예제 #1
0
def train_add_k_param(bag, dev, alphas, dev_labels):
    dev_accuracy = []
    for alpha in alphas:
        bag.modify_alpha(alpha)
        pred = bag.batch_classify(dev)
        dev_accuracy.append((accuracy(dev_labels, pred), alpha))

    # print(dev_accuracy)

    return [x[0] for x in dev_accuracy], max(dev_accuracy)[1]
예제 #2
0
def test_classifier(dataset, model):
    ''' 
    test the trained naive Bayes classifier
    
    Input:
        data: testing data
        model: a tuple, trained prior and conditional possibilities
    Output:
        None, but print the accuracy of the model on testing data
    '''
    prior, conditional = model

    tokens = dataset.tokens()
    testset = dataset.getTestSentences()
    nTest = len(testset)

    pred = np.zeros((nTest, ), dtype=np.int32)  # prediction
    testLabels = np.zeros((nTest, ), dtype=np.int32)  # true label
    for i in range(nTest):
        words, testLabels[i] = testset[i]
        pred[i] = naiveBayesClassifier(tokens, words, prior, conditional)

    print("Test accuracy (%%): %f" % accuracy(testLabels, pred))
예제 #3
0
# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 
        weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg" : regularization, 
        "weights" : weights, 
        "train" : trainAccuracy, 
        "dev" : devAccuracy})

# Print the accuracies
        testFeatures[i, wordToken] = tokenCount[wordToken]

# Try our regularization parameters
results = []

# 1. Multinomial Naive Bayes + Bag of Words

# Test on train set
from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB()
clf.fit(trainFeatures, trainLabels)

# Test on train set
pred = clf.predict(trainFeatures)
trainAccuracy = accuracy(trainLabels, pred)
print("Train accuracy (%%): %f" % trainAccuracy)

# Test on dev set
pred = clf.predict(devFeatures)
devAccuracy = accuracy(devLabels, pred)
print("Dev accuracy (%%): %f" % devAccuracy)

# Test on test set
pred = clf.predict(testFeatures)
testAccuracy = accuracy(testLabels, pred)
print("Test accuracy (%%): %f" % testAccuracy)

# Save the results and weights
results.append({
    "method": "Multinomial Naive Bayes + Bag of Words",
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print("Training for reg=%f" % regularization)

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels,
                                                  weights, regularization),
                  weights,
                  3.0,
                  1000,
                  PRINT_EVERY=1000)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print("Train accuracy (%%): %f" % trainAccuracy)

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print("Dev accuracy (%%): %f" % devAccuracy)

    # Save the results and weights
    results.append({
        "reg": regularization,
        "weights": weights,
        "train": trainAccuracy,
        "dev": devAccuracy
    })
예제 #6
0
        trainBags = BagOfWords()
        trainBags.bag_construction(trainset, negation, boolean)
        dev_acc, best_alpha = train_add_k_param(trainBags, devset, Alpha,
                                                devLabels)
        print("Best alpha value: %f" % best_alpha)
        trainBags.modify_alpha(best_alpha)

        # test data
        testset = dataset.getTestSentences()
        nTest = len(testset)
        testLabels = np.zeros((nTest, ), dtype=np.int32)
        for i in range(nTest):
            words, testLabels[i] = testset[i]

        test_pred = trainBags.batch_classify(testset)
        test_acc = accuracy(testLabels, test_pred)

        all_test_acc.append(test_acc)
        all_dev_acc.append(dev_acc)

    for test_acc in all_test_acc:
        print("Test accuracy (%%): %f" % test_acc)

    for dev_acc in all_dev_acc:
        plt.plot(Alpha, dev_acc)
    plt.title("Accuracy on dev set")
    plt.xscale('log')
    plt.xlabel("alpha")
    plt.ylabel("accuracy")
    plt.legend(
        ['neg=F, boo=F', 'neg=T, boo=F', 'neg=F, boo=T', 'neg=T, boo=T'],