def test_percentage_punctuation(self): test_sentences = [ ("The cat sat on the hat.", float(1)/18), (".", 1), (" ", 0), ("", 0), (test_text, 20.0/147) ] for test_sentence, result in test_sentences: analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence)) percent_punctuation = analyse_result_dict['percentage_punctuation'] self.assertEquals(percent_punctuation, result, msg=(test_sentence, percent_punctuation, '!=', result))
def test_lexical_diversity(self): test_sentences = [ ("The quick brown cat jumped over the lazy dog.", float(8)/9), ("", 0), (".", 0), (" ", 0), (test_text, 13.0/48) ] for test_sentence, result in test_sentences: analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence)) lexical_diversity = analyse_result_dict['lexical_diversity'] self.assertEquals(lexical_diversity, result, msg=(test_sentence, lexical_diversity, '!=', result))
def test_average_word_length(self): avg_word_length_cases = [ ("The quick brown fox jumped over the lazy dog", 4), ("", 0), (" ", 0), (",", 0), (test_text, 127.0/48) ] for test_sentence, result in avg_word_length_cases: analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence)) avg_word_length = analyse_result_dict['avg_word_length'] self.assertEquals(avg_word_length, result, msg=(test_sentence, avg_word_length, '!=', result))
def test_average_sentence_length(self): avg_sentence_length_cases = [ ("The cat sat on the hat. The fox jumped.", 4.5), ("", 0), ("Alice.", 1), ("Mr. Smith went to work.", 5), ("'hi,' said Katie.", 3), (test_text, 48.0/9) ] for test_sentence, result in avg_sentence_length_cases: analyse_result_dict = compute_fingerprint.analyze_text(util.tokenize_sentences(test_sentence)) avg_sentence_length = analyse_result_dict['avg_sentence_length'] self.assertEquals(avg_sentence_length, result, msg=(test_sentence, avg_sentence_length, '!=', result))
def test_fingerprint_text(self): empty_result = {key: 0 for key in constants.CHUNK_MODEL_FINGERPRINT_FIELDS} test_texts = [ ({'chunk': ' '}, empty_result), ({'chunk': test_text}, seuss_result_dictionary) ] for argument_dictionary, result_dictionary in test_texts: result_list = [] fingerprint_list=[] fingerprint = compute_fingerprint.fingerprint_text(util.tokenize_sentences(argument_dictionary['chunk'])) for field in constants.CHUNK_MODEL_FINGERPRINT_FIELDS: result_list.append(result_dictionary[field]) fingerprint_list.append(fingerprint[field]) self.assertEquals(fingerprint_list, result_list)