def test(): """ Run tests on the implementation of the naive Bayes classifier. The tests are going to be ran on instances 20-25 from both the train and test sets of the contest agent. Passing this test is a very good (however not a perfect) indicator that your code is correct. """ train_path = os.path.join('classifier_data', 'contest_training.tsv') test_path = os.path.join('classifier_data', 'contest_test.tsv') smoothing = [0, 1] logtransform = { 0: [True, False], 1: [True] } trainData, trainLabels, trainFeatures, = loadDataset(train_path) testData, testLabels, testFeatures = loadDataset(test_path) labels = set(trainLabels) | set(testLabels) for s in smoothing: for lt in logtransform[s]: classifierArgs = {'smoothing':s, 'logTransform':lt} classifierArgs['legalLabels'] = labels if s: featureValues = mergeFeatureValues(trainFeatures, testFeatures) classifierArgs['featureValues'] = featureValues # train on train set classifier = NaiveBayesClassifier(**classifierArgs) classifier.fit(trainData, trainLabels) # evaluate on train set trainPredictions = classifier.predict(trainData) evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k) staticOutputCheck(train_path, s, lt, classifier.posteriors[20:25]) # evaluate on test set testPredictions = classifier.predict(testData) evaluateClassifier(testPredictions, testLabels, 'test', classifier.k) staticOutputCheck(test_path, s, lt, classifier.posteriors[20:25])
def runClassifier(train_path, test_path, smoothing, logtransform): classifierArgs = {'smoothing':smoothing, 'logTransform':logtransform} trainData, trainLabels, trainFeatures, = loadDataset(train_path) testData, testLabels, testFeatures = loadDataset(test_path) labels = set(trainLabels) | set(testLabels) classifierArgs['legalLabels'] = labels if smoothing: featureValues = mergeFeatureValues(trainFeatures, testFeatures) classifierArgs['featureValues'] = featureValues # train the actual model classifier = NaiveBayesClassifier(**classifierArgs) classifier.fit(trainData, trainLabels) trainPredictions = classifier.predict(trainData) evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k) testPredictions = classifier.predict(testData) evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)