def train_add_k_param(bag, dev, alphas, dev_labels): dev_accuracy = [] for alpha in alphas: bag.modify_alpha(alpha) pred = bag.batch_classify(dev) dev_accuracy.append((accuracy(dev_labels, pred), alpha)) # print(dev_accuracy) return [x[0] for x in dev_accuracy], max(dev_accuracy)[1]
def test_classifier(dataset, model): ''' test the trained naive Bayes classifier Input: data: testing data model: a tuple, trained prior and conditional possibilities Output: None, but print the accuracy of the model on testing data ''' prior, conditional = model tokens = dataset.tokens() testset = dataset.getTestSentences() nTest = len(testset) pred = np.zeros((nTest, ), dtype=np.int32) # prediction testLabels = np.zeros((nTest, ), dtype=np.int32) # true label for i in range(nTest): words, testLabels[i] = testset[i] pred[i] = naiveBayesClassifier(tokens, words, prior, conditional) print("Test accuracy (%%): %f" % accuracy(testLabels, pred))
# Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({ "reg" : regularization, "weights" : weights, "train" : trainAccuracy, "dev" : devAccuracy}) # Print the accuracies
testFeatures[i, wordToken] = tokenCount[wordToken] # Try our regularization parameters results = [] # 1. Multinomial Naive Bayes + Bag of Words # Test on train set from sklearn.naive_bayes import MultinomialNB clf = MultinomialNB() clf.fit(trainFeatures, trainLabels) # Test on train set pred = clf.predict(trainFeatures) trainAccuracy = accuracy(trainLabels, pred) print("Train accuracy (%%): %f" % trainAccuracy) # Test on dev set pred = clf.predict(devFeatures) devAccuracy = accuracy(devLabels, pred) print("Dev accuracy (%%): %f" % devAccuracy) # Test on test set pred = clf.predict(testFeatures) testAccuracy = accuracy(testLabels, pred) print("Test accuracy (%%): %f" % testAccuracy) # Save the results and weights results.append({ "method": "Multinomial Naive Bayes + Bag of Words",
random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print("Training for reg=%f" % regularization) # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 1000, PRINT_EVERY=1000) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print("Train accuracy (%%): %f" % trainAccuracy) # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print("Dev accuracy (%%): %f" % devAccuracy) # Save the results and weights results.append({ "reg": regularization, "weights": weights, "train": trainAccuracy, "dev": devAccuracy })
trainBags = BagOfWords() trainBags.bag_construction(trainset, negation, boolean) dev_acc, best_alpha = train_add_k_param(trainBags, devset, Alpha, devLabels) print("Best alpha value: %f" % best_alpha) trainBags.modify_alpha(best_alpha) # test data testset = dataset.getTestSentences() nTest = len(testset) testLabels = np.zeros((nTest, ), dtype=np.int32) for i in range(nTest): words, testLabels[i] = testset[i] test_pred = trainBags.batch_classify(testset) test_acc = accuracy(testLabels, test_pred) all_test_acc.append(test_acc) all_dev_acc.append(dev_acc) for test_acc in all_test_acc: print("Test accuracy (%%): %f" % test_acc) for dev_acc in all_dev_acc: plt.plot(Alpha, dev_acc) plt.title("Accuracy on dev set") plt.xscale('log') plt.xlabel("alpha") plt.ylabel("accuracy") plt.legend( ['neg=F, boo=F', 'neg=T, boo=F', 'neg=F, boo=T', 'neg=T, boo=T'],