def test_allPredictionsWrong(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary  
     truth_dict = create_classification()
     create_truth_and_prediction_file(truth_dict, invert_classes)
     # Excercise the SUT
     q = compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, 0.0)
 def test_allPredictionsWrong(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary
     truth_dict = create_classification()
     create_truth_and_prediction_file(truth_dict, invert_classes)
     # Excercise the SUT
     with replaced_open():  # Insist on explicit use of encoding
         q = self.compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, 0.0)
def create_identical_truth_and_prediction_file():
    """
    Create identical !truth.txt and !prediction.txt files in the corpus directory.

    Here we assume that the corpus directory already exists.
    """
    # Create an artificial email classification dictionary  
    class_dict = create_classification()
    # Compile the filepaths
    truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME)
    pred_filepath = os.path.join(CORPUS_DIR, PREDICTION_FILANAME)
    # Save the same dictionary as both the !truth.txt and !prediction.txt
    save_classification_to_file(class_dict, truth_filepath)
    save_classification_to_file(class_dict, pred_filepath)
 def test_allPredictionsHam_for10SpamsAnd10Hams(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary  
     truth_dict = create_classification(n_items=20, n_spams=10)
     create_truth_and_prediction_file(truth_dict, hams_only)
     # Since there are 10 spams and 10 hams in the corpus,
     # and predictions are all ham, then the confusion matrix 
     # shall have TN = 10 and FN = 10, zero positives.
     # The modified accuracy then is:
     expected_q = 10 / (10 + 10)
     # Excercise the SUT
     q = compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, expected_q)
 def test_allPredictionsHam_for10SpamsAnd20Hams(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary
     truth_dict = create_classification(n_items=30, n_spams=10)
     create_truth_and_prediction_file(truth_dict, hams_only)
     # Since there are 10 spams and 20 hams in the corpus,
     # and predictions are all ham, then the confusion matrix
     # shall have TN = 20 and FN = 10, zero positives.
     # The modified accuracy then is:
     expected_q = 20 / (20 + 10)
     # Excercise the SUT
     with replaced_open():  # Insist on explicit use of encoding
         q = self.compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, expected_q)
def create_inverse_truth_and_prediction_file():
    """
    Create inverse !truth.txt and !prediction.txt files in the corpus directory.

    Here we assume that the corpus directory already exists.
    """
    # Create an artificial truth dictionary
    truth_dict = create_classification()
    # Create an inverted version of truth_dict
    pred_dict = invert_classes(truth_dict)
    # Compile the filepaths
    truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME)
    pred_filepath = os.path.join(CORPUS_DIR, PREDICTION_FILANAME)
    # Save the dictionaries in !truth.txt and !prediction.txt, respectively.
    save_classification_to_file(truth_dict, truth_filepath)
    save_classification_to_file(pred_dict, pred_filepath)
 def test_1FP2FN_for10SpamsAnd20Hams(self):
     # Prepare the SUT
     # Create an artificial email classification dictionary
     truth_dict = create_classification(n_items=30, n_spams=10)
     create_truth_and_prediction_file(truth_dict, n_FP_n_FN)
     # Since there are 10 spams and 20 hams in the corpus,
     # and predictions are all correct except 1 FP and 2 FN,
     # then the confusion matrix shall have
     # FP = 1, FN = 2,
     # TN = 19, TP = 8.
     # The modified accuracy then is:
     expected_q = (8 + 19) / (8 + 19 + 10 * 1 + 2)
     # Excercise the SUT
     with replaced_open():  # Insist on explicit use of encoding
         q = self.compute_quality_for_corpus(CORPUS_DIR)
     # Assertions
     self.assertEqual(q, expected_q)