Ejemplo n.º 1
0
train_texts, train_labels = zip(*((sample['text'], sample['sentiment'])
                                  for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = lab2.bag_of_words(train_texts)

train_bow_features = lab2.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = lab2.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = lab2.extract_bow_feature_vectors(test_texts, dictionary)

# You may modify the following when adding additional features (Part 3c)

train_final_features = lab2.extract_final_features(train_texts, dictionary)
val_final_features = lab2.extract_final_features(val_texts, dictionary)
test_final_features = lab2.extract_final_features(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Part 1 - Perceptron Algorithm
#-------------------------------------------------------------------------------

# toy_features, toy_labels = utils.load_toy_data('../../Data/toy_data.csv')

# theta, theta_0 = lab2.perceptron(toy_features, toy_labels, T=5)

# utils.plot_toy_results(toy_features, toy_labels, theta, theta_0)

#-------------------------------------------------------------------------------
# Part 2 - Classifying Reviews
Ejemplo n.º 2
0
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = lab2.bag_of_words(train_texts)

train_bow_features = lab2.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = lab2.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = lab2.extract_bow_feature_vectors(test_texts, dictionary)

# You may modify the following when adding additional features (Part 3c)

#dictionary_final = lab2.bag_of_words_final(train_texts, stopwords=None)
dictionary_final = lab2.bag_of_words_with_bigrams(train_texts, stopwords=None)

train_final_features = lab2.extract_final_features(train_texts,
                                                   dictionary_final, stopwords)
val_final_features = lab2.extract_final_features(val_texts, dictionary_final,
                                                 stopwords)
test_final_features = lab2.extract_final_features(test_texts, dictionary_final,
                                                  stopwords)

#-------------------------------------------------------------------------------
# Part 1 - Perceptron Algorithm
#-------------------------------------------------------------------------------

toy_features, toy_labels = utils.load_toy_data('../../Data/toy_data.csv')

theta, theta_0 = lab2.perceptron(toy_features, toy_labels, T=5)

utils.plot_toy_results(toy_features, toy_labels, theta, theta_0)