Exemple #1
0
def test():
    """
    Run tests on the implementation of the naive Bayes classifier. 
    The tests are going to be ran on instances 20-25 from both the train and test sets of the contest agent.
    Passing this test is a very good (however not a perfect) indicator that your code is correct.
    """
    train_path = os.path.join('classifier_data', 'contest_training.tsv')
    test_path = os.path.join('classifier_data', 'contest_test.tsv')
    smoothing = [0, 1]
    logtransform = {
        0: [True, False],
        1: [True]
    }
    
    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    
    for s in smoothing:
        for lt in logtransform[s]:
            classifierArgs = {'smoothing':s, 'logTransform':lt}
            classifierArgs['legalLabels'] = labels 
            if s:
                featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
                classifierArgs['featureValues'] = featureValues

            # train on train set
            classifier = NaiveBayesClassifier(**classifierArgs)
            classifier.fit(trainData, trainLabels)
            
            # evaluate on train set
            trainPredictions = classifier.predict(trainData)
            evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)
            staticOutputCheck(train_path, s, lt, classifier.posteriors[20:25])

            # evaluate on test set
            testPredictions = classifier.predict(testData)
            evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
            staticOutputCheck(test_path, s, lt, classifier.posteriors[20:25])
Exemple #2
0
def test():
    """
    Run tests on the implementation of the naive Bayes classifier. 
    The tests are going to be ran on instances 20-25 from both the train and test sets of the contest agent.
    Passing this test is a very good (however not a perfect) indicator that your code is correct.
    """
    train_path = os.path.join('classifier_data', 'contest_training.tsv')
    test_path = os.path.join('classifier_data', 'contest_test.tsv')
    smoothing = [0, 1]
    logtransform = {
        0: [True, False],
        1: [True]
    }
    
    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    
    for s in smoothing:
        for lt in logtransform[s]:
            classifierArgs = {'smoothing':s, 'logTransform':lt}
            classifierArgs['legalLabels'] = labels 
            if s:
                featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
                classifierArgs['featureValues'] = featureValues

            # train on train set
            classifier = NaiveBayesClassifier(**classifierArgs)
            classifier.fit(trainData, trainLabels)
            
            # evaluate on train set
            trainPredictions = classifier.predict(trainData)
            evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)
            staticOutputCheck(train_path, s, lt, classifier.posteriors[20:25])

            # evaluate on test set
            testPredictions = classifier.predict(testData)
            evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
            staticOutputCheck(test_path, s, lt, classifier.posteriors[20:25])
Exemple #3
0
def runClassifier(train_path, test_path, smoothing, logtransform):
    classifierArgs = {'smoothing':smoothing, 'logTransform':logtransform}

    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    classifierArgs['legalLabels'] = labels 

    if smoothing:
        featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
        classifierArgs['featureValues'] = featureValues

    # train the actual model
    classifier = NaiveBayesClassifier(**classifierArgs)
    classifier.fit(trainData, trainLabels)
    
    trainPredictions = classifier.predict(trainData)
    evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)

    testPredictions = classifier.predict(testData)
    evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
Exemple #4
0
def runClassifier(train_path, test_path, smoothing, logtransform):
    classifierArgs = {'smoothing':smoothing, 'logTransform':logtransform}

    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    classifierArgs['legalLabels'] = labels 

    if smoothing:
        featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
        classifierArgs['featureValues'] = featureValues

    # train the actual model
    classifier = NaiveBayesClassifier(**classifierArgs)
    classifier.fit(trainData, trainLabels)
    
    trainPredictions = classifier.predict(trainData)
    evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)

    testPredictions = classifier.predict(testData)
    evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)