def testEquations(classifier, labeled_data): correctly_found = 0 incorrectly_found = 1 for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_data: foundIndex = classifier.classify(extractFeatures(wordproblem)) #print foundIndex, i if foundIndex == i: correctly_found += 1 else: incorrectly_found += 1 return (correctly_found, incorrectly_found)
def trainClassifier(labeled_word_problems, algorithm): featuresets = [(extractFeatures(wordproblem), i) for (i, iIndex, wordproblem, equationTemplate, lSolutions) in labeled_word_problems] train_set = featuresets if algorithm == 'DecisionTree': classifier = nltk.DecisionTreeClassifier.train(train_set) elif algorithm == 'NaiveBayes': classifier = nltk.NaiveBayesClassifier.train(train_set) elif algorithm == 'MaxEntMegam': classifier = nltk.classify.MaxentClassifier.train(train_set, 'MEGAM', trace=0, max_iter=1) elif algorithm == 'MaxEnt': classifier = nltk.MaxentClassifier.train(train_set) return classifier #chosen_template = classifier.classify(extractFeature('Marc sold 563 tickets for the school play. Student tickets cost 4 dollars and adult tickets cost 6 dollars. Marc \'s sales totaled 2840 dollars. How many adult tickets and student tickets did Marc sell? ')) #return chosen_template
from FeatureExtractor import extractFeatures extractFeatures("./1_fulldoc.txt", "./1_fulldocwithoutstemming.txt")