예제 #1
0
    def test_classification_test_dataset(self):
        # -------------------------------------------------------------------------------
        # Use the best method (perceptron, average perceptron or Pegasos) along with
        # the optimal hyperparameters according to validation accuracies to test
        # against the test dataset. The test data has been provided as
        # test_bow_features and test_labels.
        # -------------------------------------------------------------------------------
        T = 25
        L = 0.01
        theta, theta_0 = p1.pegasos(feature_matrix=train_bow_features, labels=train_labels, T=T, L=L)
        pred_labels = p1.classify(test_bow_features, theta, theta_0)
        accuracy = p1.accuracy(pred_labels, test_labels)
        print(f'Accuracy on test data : {accuracy}')
        # -------------------------------------------------------------------------------
        # Assign to best_theta, the weights (and not the bias!) learned by your most
        # accurate algorithm with the optimal choice of hyperparameters.
        # -------------------------------------------------------------------------------

        best_theta = theta
        wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
        sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
        print("Most Explanatory Word Features")
        print(sorted_word_features[:10])
        print("Least Explanatory Word Features")
        print(sorted_word_features[-10:])
        return
예제 #2
0
파일: main.py 프로젝트: yshen4/pymal
def problem5(T = 10, L = 0.2):
    toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

    thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
    thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
    thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

    plot_toy_results('Perceptron', thetas_perceptron, toy_features, toy_labels)
    plot_toy_results('Average Perceptron', thetas_avg_perceptron, toy_features, toy_labels)
    plot_toy_results('Pegasos', thetas_pegasos, toy_features, toy_labels)
예제 #3
0
    def test_algorithm_compare(self):
        # -------------------------------------------------------------------------------
        # # Problem 5
        # #-------------------------------------------------------------------------------

        toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

        T = 100
        L = 0.2

        thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
        thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
        thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

        def plot_toy_results(algo_name, thetas):
            print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
            print('theta_0 for', algo_name, 'is', str(thetas[1]))
            utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

        plot_toy_results('Perceptron', thetas_perceptron)
        plot_toy_results('Average Perceptron', thetas_avg_perceptron)
        plot_toy_results('Pegasos', thetas_pegasos)
        return
예제 #4
0
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)
'''

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here

T = 25
L = 0.01
th, th_0 = p1.pegasos(train_bow_features, train_labels, T, L)
rezul = p1.classify(test_bow_features, th, th_0)
print('REZ = {:.4f}'.format(p1.accuracy(rezul, test_labels)))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------
'''
T = 25
L = 0.01
th, th_0 = p1.pegasos(train_bow_features , train_labels, T, L)
best_theta = th
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
예제 #5
0
 def train_fn(features, labels, T):
     return p1.pegasos(features, labels, T, best_L)
예제 #6
0
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------
T=25
L=0.0100
avg_peg_train_accuracy, avg_peg_val_accuracy = \
    p1.classifier_accuracy(p1.pegasos, train_bow_features,test_bow_features,train_labels,test_labels,T=T,L=L)
print("{:50} {:.4f}".format("Training accuracy for Pegasos:", avg_peg_train_accuracy))
print("{:50} {:.4f}".format("Validation accuracy for Pegasos:", avg_peg_val_accuracy))
thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L)
# print(thetas_pegasos)
#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta =thetas_pegasos[0]
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, p1.bag_of_words(wordlist))
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
#-------------------------------------------------------------------------------


예제 #7
0
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 2000
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

def plot_toy_results(algo_name, thetas):
     print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0]))))
     print('theta_0 for', algo_name, 'is', str(thetas[1]))
     utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------

T = 10
예제 #8
0
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    #utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------
#utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
#utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
#utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

L = 0.01
T = 25
#theta, theta_0 = p1.pegasos(train_bow_features, train_labels, T, L)
#labels = p1.classify(test_bow_features, theta, theta_0)
#kwargs={'T':T,'L': L}
#acc = p1.classifier_accuracy(p1.pegasos, test_bow_features, test_bow_features,  labels, test_labels, **kwargs)
#print(acc)
#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta, theta_0 = p1.pegasos(train_bow_features, train_labels, T, L)
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
예제 #10
0
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------

T = 10
예제 #11
0
# -------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
# -------------------------------------------------------------------------------

# Your code here
data = (train_bow_features, train_labels, test_bow_features, test_labels)
fix_T = 25
Ls = [0.01]
peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data)
print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]),
                                       Ls[np.argmax(peg_tune_results_L[1])]))

# -------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
# -------------------------------------------------------------------------------

best_theta = p1.pegasos(test_bow_features, test_labels, 25, 0.01)[0]
# best_theta = p1.pegasos(train_bow_features, train_labels, 25, 0.01)[0]
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
예제 #12
0
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

peg_best_T = 25
peg_best_L = 0.01
peg_train_accuracy, peg_test_accuracy = p1.classifier_accuracy(p1.pegasos,
                                                               train_bow_features,
                                                               test_bow_features,
                                                               train_labels,
                                                               test_labels,
                                                               T=peg_best_T,
                                                               L=peg_best_L)

print(peg_test_accuracy)
peg_theta, peg_theta_0 = p1.pegasos(train_bow_features, train_labels,
                                    peg_best_T, peg_best_L)
print(peg_theta, peg_theta_0)
peg_test_preds = p1.classify(test_bow_features, peg_theta, peg_theta_0)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = peg_theta  # Your code here
wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#print(train_bow_features)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')
T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------

T = 10
예제 #14
0
#
# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------
        
T = 25
L = 0.01
avg_peg_train_accuracy, avg_peg_test_accuracy = p1.classifier_accuracy(
    p1.pegasos, train_bow_count_features, test_bow_count_features, train_labels, test_labels,T=T,L=L)
print("{:50} {:.4f}".format("Training accuracy for Pegasos:", avg_peg_train_accuracy))
print("{:50} {:.4f}".format("Validation accuracy for Pegasos:", avg_peg_test_accuracy))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------
theta, theta_0 = p1.pegasos(train_bow_count_features, train_labels, T, L)
best_theta = theta # Your code here
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
예제 #15
0
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

hyper_param = dict({'T': 25, 'L': 0.01})
accuracy_train, accuracy_test = p1.classifier_accuracy(p1.pegasos, train_bow_features, \
    test_bow_features, train_labels, test_labels, **hyper_param)

print("accuracy on test set= {:.4f} ".format(accuracy_test))
#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = p1.pegasos(train_bow_features, train_labels, hyper_param.get('T'),
                        hyper_param.get('L'))[0]
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
print("Most (negative) Explanatory Word Features")
print(sorted_word_features[-10:])

#-------------------------------------------------------------------------------
# Removing stopwords and using the Pegasos algortihm with the optimised
# hyperparameters.
#-------------------------------------------------------------------------------
예제 #16
0
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here
T = 25
L = 0.01
test_peg_train_accuracy, test_peg_val_accuracy = \
   p1.classifier_accuracy(p1.pegasos, train_bow_features,test_bow_features,train_labels,test_labels,T=T,L=L)

print(test_peg_val_accuracy)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta, _ = p1.pegasos(train_bow_features, train_labels, T,
                           L)  # Your code here
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
예제 #17
0
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 200
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


#plot_toy_results('Perceptron', thetas_perceptron)
#plot_toy_results('Average Perceptron', thetas_avg_perceptron)
#plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------
예제 #18
0
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here
L_best = 0.01
T_best = 25
theta, theta_0 = p1.pegasos(train_bow_features,
                            train_labels,
                            L=L_best,
                            T=T_best)
labels_hat = p1.classify(test_bow_features, theta, theta_0)
print(p1.accuracy(labels_hat, test_labels))
#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[-10:])
예제 #19
0
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

# -----------------------------------------------------------------------------
# Problem 5
# -----------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


#plot_toy_results('Perceptron', thetas_perceptron)
#plot_toy_results('Average Perceptron', thetas_avg_perceptron)
#plot_toy_results('Pegasos', thetas_pegasos)

## Convergence check

#for t in range(2, 20000, 1000):
dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 500
L = 0.2

#Find the theta and theta_0 for corresponding algorithms
thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

T = 10
L = 0.01
예제 #21
0
#
# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here

theta, theta_0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)

acc = p1.classifier_accuracy(p1.pegasos,
                             train_bow_features,
                             test_bow_features,
                             train_labels,
                             test_labels,
                             T=25,
                             L=0.01)
print(acc)
#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta  # Your code here
예제 #22
0
파일: main.py 프로젝트: hanhsien/6.86x
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

def plot_toy_results(algo_name, thetas):
    
    print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2
#
thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


#
def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


#
plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
예제 #24
0
print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]),
                                       Ls[np.argmax(peg_tune_results_L[1])]))

utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset.
#-------------------------------------------------------------------------------

(test_theta, test_theta_0) = p1.pegasos(train_bow_features, train_labels, 25,
                                        0.01)
n = np.size(test_bow_features, 0)
z = p1.classify(test_bow_features, test_theta, test_theta_0)
train_error = p1.accuracy(z, test_labels)
print("Train accuracy is: ", train_error)
# #-------------------------------------------------------------------------------
# # Assign to best_theta, the weights (and not the bias!) learned by your most
# # accurate algorithm with the optimal choice of hyperparameters.
# #-------------------------------------------------------------------------------

best_theta = test_theta
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Pegasos', thetas_pegasos)

#-------------------------------------------------------------------------------
# Problem 7
#-------------------------------------------------------------------------------
예제 #26
0
#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here
test_acc = p1.classifier_accuracy(p1.pegasos,
                                  train_bow_features,
                                  test_bow_features,
                                  train_labels,
                                  test_labels,
                                  T=25,
                                  L=0.0100)
print(test_acc)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = p1.pegasos(train_bow_features, train_labels, T=25, L=0.0100)[0]
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
예제 #27
0
 def train_fn(features, labels, L):
     return p1.pegasos(features, labels, best_T, L)
예제 #28
0
# peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data)
# print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
# print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))

# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

t, t0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)
y_pred = p1.classify(test_bow_features, t, t0)
print(p1.accuracy(y_pred, test_labels))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

# best_theta = t
# wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
# sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
# print("Most Explanatory Word Features")
# print(sorted_word_features[:10])
예제 #29
0
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

print(
    p1.classifier_accuracy(p1.pegasos,
                           train_bow_features,
                           test_bow_features,
                           train_labels,
                           test_labels,
                           T=25,
                           L=0.01))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta, best_theta_0 = p1.pegasos(train_bow_features,
                                      train_labels,
                                      T=25,
                                      L=0.01)
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
예제 #30
0
import project1 as p1
from project1 import perceptron, average_perceptron, pegasos
import utils
import numpy as np
import numpy.testing as npt
import re

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')
T = 10
L = 0.2

thetas = perceptron(toy_features, toy_labels, T)
print(thetas)
utils.plot_toy_data("Perceptron", toy_features, toy_labels, thetas)

thetas = average_perceptron(toy_features, toy_labels, T)
print(thetas)
utils.plot_toy_data("Average Perceptron", toy_features, toy_labels, thetas)

thetas = pegasos(toy_features, toy_labels, T, L)
print(thetas)
utils.plot_toy_data("Pegasos", toy_features, toy_labels, thetas)