Ejemplo n.º 1
0
 def test_02_from_dataset_02(self):
     """Verify that a classifier can be constructed with initial weights for a fiven dataset."""
     expected_weights = {'highly': 0, 'boring': 0, 'green': 0, 'eggs': 0}
     classifier = PerceptronClassifier.from_dataset(
         self.small_dataset_train_2)
     if classifier is None:
         self.fail(
             msg=
             'Constructing classifier for dataset failed: from_dataset returned None'
         )
     self.assertEqual(classifier.weights, expected_weights)
Ejemplo n.º 2
0
def nltk_movie_review_accuracy(num_iterations):
    """ Try different number of features, and optimize number of training iterations."""
    return 0, 0  # TODO: Exercise 4: remove line
    (training_documents, dev_documents, test_documents) = load_reviews()

    best_development_accuracy = 0.0
    best_num_features = 0
    best_classifier = None
    best_feature_set = None

    # Test different numbers of features.
    for n in [100, 1000, 10000]:
        print("Training with %d features..." % n)
        # Training set
        training_set = Dataset.from_document_collection(training_documents,
                                                        num_features=n)
        # Development set
        development_set = Dataset.from_document_collection(
            dev_documents, feature_set=training_set.feature_set)

        # Train classifier
        classifier = PerceptronClassifier.from_dataset(training_set)
        pass  # TODO: Exercise 4: train the classifier

        # Accuracies of classifier with n features
        train_accuracy = classifier.test_accuracy(training_set)
        development_accuracy = classifier.test_accuracy(development_set)

        if development_accuracy > best_development_accuracy:
            best_development_accuracy = development_accuracy
            best_num_features = n
            best_classifier = classifier.copy()
            best_feature_set = training_set.feature_set

    print(
        "Best classifier with %d features: \t Train Accuracy: %.4f \t Dev Accuracy: %.4f"
        % (n, train_accuracy, best_development_accuracy))
    print("Best number of features: %d " % best_num_features)
    print("Top features for positive class:")
    print(best_classifier.features_for_class(True))
    print("Top features for negative class:")
    print(best_classifier.features_for_class(False))

    # Compute test score for best setting.
    testing_set = Dataset.from_document_collection(
        test_documents, feature_set=best_feature_set)
    testing_accuracy = best_classifier.test_accuracy(testing_set)
    print("Test score for best setting: %.4f" % testing_accuracy)
    return best_development_accuracy, testing_accuracy