def testAlignments(test_data, classifiers): x0 = Symbol('x0') featuresets = [] correctlyAlignedIndicesList = [] for (i, iIndex, wordproblem, equationTemplate, solution) in test_data: try: classifier = classifiers[str(equationTemplate)] except KeyError: continue alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i) featuresets.append(alignmentFeatures) alignedString = '' for c in correctlyAlignedIndices: alignedString += str(c) correctlyAlignedIndicesList.append(alignedString) prediction = classifier.predict(featuresets) predictedAlignedIndices = [] correct = 0 for each in prediction: item = [] for char in each: item.append(int(char)) predictedAlignedIndices.append(item) #print 'sdsdsdsdsdsdsd' #print predictedAlignedIndices predictedAlignedValues = [] for x in range(0, len(test_data)): try: i, iIndex,wordproblem, equationTemplate, solution = test_data[x] numberVector = extractNumberVectorFromQuestion(wordproblem) correctAlignValues = [] for each in predictedAlignedIndices[x]: correctAlignValues.append(numberVector[each]) predictedAlignedValues.append(correctAlignValues) #print 'correctalignedvalues' #print correctAlignValues result = solveEquations(equationTemplate, getNumberslots(equationTemplate), correctAlignValues) if solution[0] == result[x0]: correct += 1 except: pass #print 'all values' #print predictedAlignedValues for i in range(0,len(prediction)): print correctlyAlignedIndicesList[i], prediction[i] if correctlyAlignedIndicesList[i] == prediction[i]: correct += 1 print 'Correct: ' + str(correct)
def trainAlignmentClassifierScikit(labeled_word_problems, algorithm): featuresets = [] correctlyAlignedIndicesList = [] for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems: print iIndex alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i, 'train') if len(correctlyAlignedIndices) != 0 and len(alignmentFeatures) != 0: featuresets.append(alignmentFeatures) alignedString = '' for i in correctlyAlignedIndices: alignedString += str(i) correctlyAlignedIndicesList.append(alignedString) #sfeaturesets = numpy.array(featuresets) #print type(featuresets) #correctlyAlignedIndicesList = numpy.array(correctlyAlignedIndicesList) #print featuresets #print correctlyAlignedIndicesList classifier = None if len(featuresets) != 0: if algorithm == 'SVM': classifier = SVC() elif algorithm == 'NaiveBayes': classifier = GaussianNB() elif algorithm == 'DecisionTree': classifier = DecisionTreeClassifier() elif algorithm == 'MaxEnt': classifier = LogisticRegression() classifier.fit(featuresets, correctlyAlignedIndicesList) return classifier
def trainAlignmentClassifier(labeled_word_problems, algorithm): featuresets = [] for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems: alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i) if len(correctlyAlignedIndices) != 0: featuresets.append((alignmentFeatures, correctlyAlignedIndices)) #featuresets = [(extractAlignmentFeatures(wordproblem, equationTemplate, solution), i) #for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems] print featuresets train_set = featuresets if algorithm == 'DecisionTree': classifier = nltk.DecisionTreeClassifier.train(train_set) elif algorithm == 'NaiveBayes': classifier = nltk.NaiveBayesClassifier.train(train_set) elif algorithm == 'MaxEntMegam': classifier = nltk.classify.MaxentClassifier.train(train_set, 'MEGAM', trace=0, max_iter=1) elif algorithm == 'MaxEnt': classifier = nltk.MaxentClassifier.train(train_set) return classifier