def check_extract_bow_feature_vectors(): ex_name = "Extract bow feature vectors" texts = ["He loves her ", "He really really loves her"] keys = ["he", "loves", "her", "really"] dictionary = {k: i for i, k in enumerate(keys)} exp_res = np.array([[1, 1, 1, 0], [1, 1, 1, 1]]) non_bin_res = np.array([[1, 1, 1, 0], [1, 1, 1, 2]]) try: res = p1.extract_bow_feature_vectors(texts, dictionary) except NotImplementedError: log(red("FAIL"), ex_name, ": not implemented") return if not type(res) == np.ndarray: log(red("FAIL"), ex_name, ": does not return a numpy array, type: ", type(res)) return if not len(res) == len(exp_res): log(red("FAIL"), ex_name, ": expected an array of shape ", exp_res.shape, " but got array of shape", res.shape) return log(green("PASS"), ex_name) if (res == exp_res).all(): log(yellow("WARN"), ex_name, ": uses binary indicators as features") elif (res == non_bin_res).all(): log(green("PASS"), ex_name, ": correct non binary features") else: log(red("FAIL"), ex_name, ": unexpected feature matrix") return
#------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- # toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') # # # T = 200 # L = 0.2 # # # thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) # thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) # thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
#------------------------------------------------------------------------------- train_data = utils.load_data('data/reviews_train.tsv') val_data = utils.load_data('data/reviews_val.tsv') test_data = utils.load_data('data/reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('data/toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
import numpy as np #------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') Arquivo = open('stopwords.txt') stopwords_data = Arquivo.read() stopwords_data = str(stopwords_data).split() Arquivo.close() train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) dictionary = p1.bag_of_words(train_texts,stopwords_data) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) T=25 L=0.01 thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L,) run=1 while(1): input_texts=input('Input your review: ') input_bow_features = p1.extract_bow_feature_vectors(['blah',input_texts],dictionary) output=p1.classify(input_bow_features,thetas_pegasos[0],thetas_pegasos[1]) if (output[-1])==1: print('_______________________________________________________________') print('This is a possitive review!') else: print('_______________________________________________________________') print('This is a negative review')
#------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # #------------------------------------------------------------------------------- # Section 1.7 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 5 L = 10 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L)
#------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
# ------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. # ------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary, count=True) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary, count=True) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary, count=True) class TestAlgoComparison(unittest.TestCase): def test_algorithm_compare(self): # ------------------------------------------------------------------------------- # # Problem 5 # #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 100 L = 0.2