def test_01_from_file_02(self): """Verify that a classifier can be constructed from a file with weights.""" classifier = PerceptronClassifier({'good': 1, 'terrible': -1}) classifier.save('data/ex05_from_file_test.model') classifier = PerceptronClassifier.from_file( 'data/ex05_from_file_test.model') if classifier is None: self.fail( msg= "Constructing classifier from file failed: from_file returned None" ) expected_weights = {'good': 1, 'terrible': -1} self.assertEqual(classifier.weights, expected_weights)
def test_02_for_dataset_02(self): """Verify that a classifier can be constructed with initial weights for a fiven dataset.""" expected_weights = {'highly': 0, 'boring': 0, 'green': 0, 'eggs': 0} classifier = PerceptronClassifier.for_dataset( self.small_dataset_train_2) if classifier is None: self.fail( msg= 'Constructing classifier for dataset failed: for_dataset returned None' ) self.assertEqual(classifier.weights, expected_weights)
def test_04_update_02(self): """Verify that the perceptron update is performed correctly.""" classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) classifier.update(self.small_instance_list_do_update[1]) expected_weigths = {'highly': 1, 'boring': 0} self.assertEqual(classifier.weights, expected_weigths) classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) do_update = classifier.update(self.small_instance_list_no_update[1]) self.assertEqual(False, do_update)
def test_04_update_01(self): """Verify that the perceptron update is performed correctly.""" classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) # Test document: ("highly", "doc25", False) classifier.update(self.small_instance_list_do_update[0]) expected_weigths = {'highly': 0, 'boring': -1} self.assertEqual(classifier.weights, expected_weigths) classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) # Test document: ("boring", "doc26", True), do_update = classifier.update(self.small_instance_list_no_update[0]) self.assertEqual(False, do_update)
def nltk_movie_review_accuracy(num_iterations): """ Try different number of features, and optimize number of training iterations.""" # Exercise 4: remove line (training_documents, dev_documents, test_documents) = load_reviews() best_development_accuracy = 0.0 best_num_features = 0 best_classifier = None best_feature_set = None # Test different numbers of features. for n in [100,1000,10000]: print("Training with %d features..." % n) # Training set training_set = Dataset.from_document_collection(training_documents, num_features=n) # Development set development_set = Dataset.from_document_collection(dev_documents, feature_set=training_set.feature_set) # Train classifier # Exercise 4: train the classifier classifier = PerceptronClassifier.for_dataset(training_set) classifier.train(training_set, development_set,num_iterations) # Accuracies of classifier with n features train_accuracy = classifier.test_accuracy(training_set) development_accuracy = classifier.test_accuracy(development_set) if development_accuracy > best_development_accuracy: best_development_accuracy = development_accuracy best_num_features = n best_classifier = classifier.copy() best_feature_set = training_set.feature_set print("Best classifier with %d features: \t Train Accuracy: %.4f \t Dev Accuracy: %.4f" % (n, train_accuracy, best_development_accuracy)) print("Best number of features: %d " % best_num_features) print("Top features for positive class:") print(best_classifier.features_for_class(True)) print("Top features for negative class:") print(best_classifier.features_for_class(False)) # Compute test score for best setting. testing_set = Dataset.from_document_collection(test_documents, feature_set=best_feature_set) testing_accuracy = best_classifier.test_accuracy(testing_set) print("Test score for best setting: %.4f" % testing_accuracy) return best_development_accuracy, testing_accuracy
def test_03_prediction_02(self): """Verify that the predictions of the classifier are as expected.""" classifier = PerceptronClassifier({'highly': 1, 'boring': -2}) for instance in self.small_dataset_pred_test_2.instance_list: prediction = classifier.prediction(instance.feature_counts) self.assertEqual(prediction, instance.label)