train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = lab2.bag_of_words(train_texts) train_bow_features = lab2.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = lab2.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = lab2.extract_bow_feature_vectors(test_texts, dictionary) # You may modify the following when adding additional features (Part 3c) train_final_features = lab2.extract_final_features(train_texts, dictionary) val_final_features = lab2.extract_final_features(val_texts, dictionary) test_final_features = lab2.extract_final_features(test_texts, dictionary) #------------------------------------------------------------------------------- # Part 1 - Perceptron Algorithm #------------------------------------------------------------------------------- # toy_features, toy_labels = utils.load_toy_data('../../Data/toy_data.csv') # theta, theta_0 = lab2.perceptron(toy_features, toy_labels, T=5) # utils.plot_toy_results(toy_features, toy_labels, theta, theta_0) #------------------------------------------------------------------------------- # Part 2 - Classifying Reviews
for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = lab2.bag_of_words(train_texts) train_bow_features = lab2.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = lab2.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = lab2.extract_bow_feature_vectors(test_texts, dictionary) # You may modify the following when adding additional features (Part 3c) #dictionary_final = lab2.bag_of_words_final(train_texts, stopwords=None) dictionary_final = lab2.bag_of_words_with_bigrams(train_texts, stopwords=None) train_final_features = lab2.extract_final_features(train_texts, dictionary_final, stopwords) val_final_features = lab2.extract_final_features(val_texts, dictionary_final, stopwords) test_final_features = lab2.extract_final_features(test_texts, dictionary_final, stopwords) #------------------------------------------------------------------------------- # Part 1 - Perceptron Algorithm #------------------------------------------------------------------------------- toy_features, toy_labels = utils.load_toy_data('../../Data/toy_data.csv') theta, theta_0 = lab2.perceptron(toy_features, toy_labels, T=5) utils.plot_toy_results(toy_features, toy_labels, theta, theta_0)