svm.grid_search_cv(n_jobs=4, verbose=5) test_error = svm.get_test_error() print "Test error in held out set: " + str(test_error) print "=" * 20 svm = SVMClassifier(train_reviews, train_labels, ngram_range=(1,2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) svm.grid_search_cv(n_jobs=4, verbose=5) test_error = svm.get_test_error() print "Test error in held out set: " + str(test_error) print "=" * 20 # Simple bag of words with a logistic classifier lr = LogisticClassifier(train_reviews, train_labels, test_texts=test_reviews, test_labels=test_labels, compute_features=True) lr.grid_search_cv(verbose=5, n_jobs=4) test_error = lr.get_test_error() print "Test error in held out set: " + str(test_error) print "=" * 20 lr = LogisticClassifier(train_reviews, train_labels, ngram_range=(1,2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) lr.grid_search_cv(verbose=5, n_jobs=4) test_error = lr.get_test_error() print "Test error in held out set: " + str(test_error) print "=" * 20 # SGD up to 3-grams
print "Test error: " + str(test_error) print "SGD Classifier" sgd = SGDTextClassifier(train_reviews, train_labels, test_texts=test_reviews, test_labels=test_labels) #train_error = sgd.get_training_error() #test_error = sgd.get_test_error() #print "Training error: " + str(train_error) #print "Test error: " + str(test_error) sgd.set_bag_of_ngrams() sgd.grid_search_cv(verbose=0, n_jobs=4) print "Logistic classifier" sgd = LogisticClassifier() sgd.set_training_data(train_reviews, train_labels) sgd.set_test_data(test_reviews, test_labels) sgd.set_bag_of_ngrams() sgd.train() train_error = sgd.get_training_error() test_error = sgd.get_test_error() print "Training error: " + str(train_error) print "Test error: " + str(test_error) print "SVM classifier" sgd = SVMClassifier() sgd.set_training_data(train_reviews, train_labels) sgd.set_test_data(test_reviews, test_labels) sgd.set_bag_of_ngrams()
svm = SVMClassifier(train_reviews, train_labels, ngram_range=(1, 2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) svm.grid_search_cv(n_jobs=4, verbose=5) test_error = svm.get_test_error() print "Test error in held out set: " + str(test_error) print "=" * 20 # Simple bag of words with a logistic classifier lr = LogisticClassifier(train_reviews, train_labels, test_texts=test_reviews, test_labels=test_labels, compute_features=True) lr.grid_search_cv(verbose=5, n_jobs=4) test_error = lr.get_test_error() print "Test error in held out set: " + str(test_error) print "=" * 20 lr = LogisticClassifier(train_reviews, train_labels, ngram_range=(1, 2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) lr.grid_search_cv(verbose=5, n_jobs=4) test_error = lr.get_test_error()
nb.set_bag_of_ngrams() # Also can compute bag of words manually nb.grid_search_cv(n_jobs=4) # Now shit with bigrams too sgd = SGDTextClassifier(train_reviews, train_labels, ngram_range=(1,2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) sgd.grid_search_cv(n_jobs=4, verbose=1) nb = NaiveBayesClassifier(train_reviews, train_labels, ngram_range=(1,2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) nb.grid_search_cv(n_jobs=4, verbose=1) lr = LogisticClassifier(train_reviews, train_labels, ngram_range=(1,2), test_texts=test_reviews, test_labels=test_labels, compute_features=True) lr.grid_search_cv(verbose=5, n_jobs=4) # print "Naive Bayes" # nb = NaiveBayesClassifier() # nb.set_training_data(train_reviews, train_labels) # nb.set_test_data(test_reviews, test_labels) # nb.set_bag_of_ngrams() # # nb.train() # train_error = nb.get_training_error() # test_error = nb.get_test_error() # print "Training error: " + str(train_error) # print "Test error: " + str(test_error)