예제 #1
0
    def test_testMethod(self):
        """Verify that test() method creates the !prediction.txt file."""

        # Exercise SUT
        with replaced_open():
            self.filter.test(CORPUS_DIR)
        # Verify
        self.assertPredictionFileExistsAndContainsClassificationFor(self.file_dict)
 def test_getClass(self):
     """Test the get_class method."""
     for key, exp_class in self.true_class.items():
         with replaced_open():
             obs_class = self.tc.get_class(key)
         self.assertEqual(
             exp_class, obs_class,
             'The expected class of email {} is {}, but {} was observed'.
             format(key, exp_class, obs_class))
 def setUp(self):
     """Prepare fake corpus with !truth.txt file."""
     self.email_dict = create_corpus_dictionary()
     self.true_class = create_classification_for(self.email_dict.keys())
     create_corpus_dir_from_dictionary(self.email_dict)
     truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME)
     save_classification_to_file(self.true_class, fname=truth_filepath)
     with replaced_open():
         self.tc = TrainingCorpus(CORPUS_DIR)
 def test_isHam(self):
     """Test the is_ham method."""
     for key, exp_class in self.true_class.items():
         exp_ham = (exp_class == HAM_TAG)
         with replaced_open():
             obs_ham = self.tc.is_ham(key)
         self.assertEqual(
             exp_ham, obs_ham,
             'The email {} hamminess: expected {}, observed {}.'.format(
                 key, str(exp_ham), str(obs_ham)))
 def test_allPredictionsWrong(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary
     truth_dict = create_classification()
     create_truth_and_prediction_file(truth_dict, invert_classes)
     # Excercise the SUT
     with replaced_open():  # Insist on explicit use of encoding
         q = self.compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, 0.0)
예제 #6
0
 def test_trainMethod(self):
     """Filter method train() shall run quietly. 
     
     Smoke test. Does not assert anything, just tries to run the method.
     """
     # Prepare the SUT
     # The train() method may use the !truth.txt file, create it!
     self.add_truth_to_corpus()
     # Excercise the SUT
     with replaced_open():
         self.filter.train(CORPUS_DIR)
예제 #7
0
 def test_corpusContainsOnlyEmails(self):
     """Test reading the corpus with email messages only."""
     corpus = Corpus(CORPUS_DIR)
     # Exercise the SUT
     observed = {}
     with replaced_open():
         for fname, contents in corpus.emails():
             observed[fname] = contents
     # Verify the results
     self.assertEqual(
         len(self.expected), len(observed),
         'The emails() method did not generate all the corpus files.')
     self.assertEqual(
         self.expected, observed,
         'The read file contents are not equal to the expected contents.')
예제 #8
0
 def test_trainAndTest(self):
     """Execute the train() and test() methods in a sequence as in real use."""
     # Prepare the SUT
     # The train() method may use the !truth.txt file, create it!
     self.add_truth_to_corpus()
     # Excercise the SUT
     with replaced_open():
         self.filter.train(CORPUS_DIR)
         # Before testing, delete the !truth.txt file
         os.unlink(self.truth_filepath)
         # Test the filter on the same corpus
         self.filter.test(CORPUS_DIR)
     # Verify
     self.assertPredictionFileExistsAndContainsClassificationFor(self.file_dict)
     
 def test_allPredictionsHam_for10SpamsAnd20Hams(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary
     truth_dict = create_classification(n_items=30, n_spams=10)
     create_truth_and_prediction_file(truth_dict, hams_only)
     # Since there are 10 spams and 20 hams in the corpus,
     # and predictions are all ham, then the confusion matrix
     # shall have TN = 20 and FN = 10, zero positives.
     # The modified accuracy then is:
     expected_q = 20 / (20 + 10)
     # Excercise the SUT
     with replaced_open():  # Insist on explicit use of encoding
         q = self.compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, expected_q)
 def test_1FP2FN_for10SpamsAnd20Hams(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary
     truth_dict = create_classification(n_items=30, n_spams=10)
     create_truth_and_prediction_file(truth_dict, n_FP_n_FN)
     # Since there are 10 spams and 20 hams in the corpus,
     # and predictions are all correct except 1 FP and 2 FN,
     # then the confusion matrix shall have
     # FP = 1, FN = 2,
     # TN = 19, TP = 8.
     # The modified accuracy then is:
     expected_q = (8 + 19) / (8 + 19 + 10 * 1 + 2)
     # Excercise the SUT
     with replaced_open():  # Insist on explicit use of encoding
         q = self.compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, expected_q)
예제 #11
0
 def test_hams(self):
     """Test hams() method."""
     obs_num_hams = 0
     with replaced_open():
         for fname, contents in self.tc.hams():
             obs_num_hams += 1
             # Validate results
             self.assertEqual(self.true_class[fname], HAM_TAG,
                              'Spam email returned by hams() method.')
             self.assertEqual(
                 self.email_dict[fname], contents,
                 'The read file contents are not equal to the expected contents.'
             )
     c = Counter(self.true_class.values())
     exp_num_hams = c[HAM_TAG]
     self.assertEqual(
         exp_num_hams, obs_num_hams,
         'The hams() method did not return the right number of hams.')
예제 #12
0
 def test_corpusContainsAlsoSpecialFiles(self):
     """Test reading the corpus with special files."""
     # Add a special file into the corpus dir
     save_file_to_corpus_dir(fname=SPECIAL_FILENAME,
                             contents='fake',
                             dirname=CORPUS_DIR)
     corpus = Corpus(CORPUS_DIR)
     # Exercise the SUT
     observed = {}
     with replaced_open():
         for fname, contents in corpus.emails():
             observed[fname] = contents
     # Verify the results
     self.assertEqual(
         len(self.expected), len(observed),
         'The emails() method did not generate all the corpus files.')
     self.assertEqual(
         self.expected, observed,
         'The read file contents are not equal to the expected contents.')