def runTrainError(dataFile, classFile, vocab, samples): print "Samples:" , samples print "Vocab size:", vocab data = ExampleData(int(samples), int(vocab)) data.readDataVectorFile(dataFile) data.readClassificationFile(classFile) learner = Learner(data) error = dict() for cl in learner.clf.keys(): learner.learn(cl) error[cl] = learner.test(cl, data.Xdata, data.Ydata) print "NB train acc:%f\tLinearSVC train acc:%f\tSVM train acc:%f\tSGD train acc:%f" %(error["NB"], error["LinearSVC"], error["SVM"], error["SGD"])
def runML(dataFile, classFile, vocab, samples): print "Samples:" , samples print "Vocab size:", vocab data = ExampleData(int(samples), int(vocab)) data.readDataVectorFile(dataFile) data.readClassificationFile(classFile) avgNB = 0 avgLin = 0 avgSVC = 0 avgSGD = 0 numTrials = 5 learner = Learner(None) for n in range(numTrials): score = learner.test_kf("NB", data.Xdata, data.Ydata) meanNB = score.mean() avgNB += meanNB print "NB:", meanNB score = learner.test_kf("LinearSVC", data.Xdata, data.Ydata) meanLin = score.mean() avgLin += meanLin print "LinearSVC:", meanLin score = learner.test_kf("SGD", data.Xdata, data.Ydata) meanSGD = score.mean() avgSGD += meanSGD print "SGD:", meanSGD score = learner.test_kf("SVM", data.Xdata, data.Ydata) meanSVC = score.mean() avgSVC += meanSVC print "SVM:", meanSVC avgNB /= numTrials avgLin /=numTrials avgSGD /=numTrials avgSVC /=numTrials print "NB Avg:%f\tLinearSVC Avg:%f\tSVM Avg:%f\tSGD Avg:%f" %(avgNB, avgLin, avgSVC, avgSGD)