def testEquations(classifier, labeled_data):
    correctly_found = 0
    incorrectly_found = 1
    for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_data:
        foundIndex = classifier.classify(extractFeatures(wordproblem))
        #print foundIndex, i
        if foundIndex == i:
            correctly_found += 1
        else:
            incorrectly_found += 1
    return (correctly_found, incorrectly_found)
def trainClassifier(labeled_word_problems, algorithm):
        

    featuresets = [(extractFeatures(wordproblem), i) for (i, iIndex, wordproblem, equationTemplate, lSolutions) in labeled_word_problems]

    train_set = featuresets

    if algorithm == 'DecisionTree':
        classifier = nltk.DecisionTreeClassifier.train(train_set)
    elif algorithm == 'NaiveBayes':
        classifier = nltk.NaiveBayesClassifier.train(train_set)
    elif algorithm == 'MaxEntMegam':
        classifier = nltk.classify.MaxentClassifier.train(train_set, 'MEGAM', trace=0, max_iter=1)
    elif algorithm == 'MaxEnt':
        classifier = nltk.MaxentClassifier.train(train_set)

    return classifier

    #chosen_template = classifier.classify(extractFeature('Marc sold 563 tickets for the school play. Student tickets cost 4 dollars and adult tickets cost 6 dollars. Marc \'s sales totaled 2840 dollars. How many adult tickets and student tickets did Marc sell? '))

    #return chosen_template
Example #3
0
from FeatureExtractor import extractFeatures

extractFeatures("./1_fulldoc.txt", "./1_fulldocwithoutstemming.txt")