def test_percentage_punctuation(self):
     test_sentences = [
         ("The cat sat on the hat.", float(1)/18),
         (".", 1),
         (" ", 0),
         ("", 0),
         (test_text, 20.0/147)
     ]
     for test_sentence, result in test_sentences:
         analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence))
         percent_punctuation = analyse_result_dict['percentage_punctuation']
         self.assertEquals(percent_punctuation, result, msg=(test_sentence, percent_punctuation, '!=', result))
 def test_lexical_diversity(self):
     test_sentences = [
         ("The quick brown cat jumped over the lazy dog.", float(8)/9),
         ("", 0),
         (".", 0),
         (" ", 0),
         (test_text, 13.0/48)
     ]
     for test_sentence, result in test_sentences:
         analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence))
         lexical_diversity = analyse_result_dict['lexical_diversity']
         self.assertEquals(lexical_diversity, result, msg=(test_sentence, lexical_diversity, '!=', result))
 def test_average_word_length(self):
     avg_word_length_cases = [
         ("The quick brown fox jumped over the lazy dog", 4),
         ("", 0),
         (" ", 0),
         (",", 0),
         (test_text, 127.0/48)
     ]
     for test_sentence, result in avg_word_length_cases:
         analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence))
         avg_word_length = analyse_result_dict['avg_word_length']
         self.assertEquals(avg_word_length, result, msg=(test_sentence, avg_word_length, '!=', result))
 def test_average_sentence_length(self):
     avg_sentence_length_cases = [
         ("The cat sat on the hat. The fox jumped.", 4.5),
         ("", 0),
         ("Alice.", 1),
         ("Mr. Smith went to work.", 5),
         ("'hi,' said Katie.", 3),
         (test_text, 48.0/9)
     ]
     for test_sentence, result in avg_sentence_length_cases:
         analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence))
         avg_sentence_length = analyse_result_dict['avg_sentence_length']
         self.assertEquals(avg_sentence_length, result, msg=(test_sentence, avg_sentence_length, '!=', result))
    def test_fingerprint_text(self):
        empty_result = {key: 0 for key in constants.CHUNK_MODEL_FINGERPRINT_FIELDS}
        test_texts = [
            ({'chunk': ' '},
             empty_result),
            ({'chunk': test_text},
             seuss_result_dictionary)
            ]

        for argument_dictionary, result_dictionary in test_texts:
            result_list = []
            fingerprint_list=[]

            fingerprint = compute_fingerprint.fingerprint_text(util.tokenize_sentences(argument_dictionary['chunk']))
            for field in constants.CHUNK_MODEL_FINGERPRINT_FIELDS:
                result_list.append(result_dictionary[field])
                fingerprint_list.append(fingerprint[field])
            self.assertEquals(fingerprint_list, result_list)