def test_text_keywords(self): text = get_text_from_test_data("mihalcea_tarau.txt") # Calculate keywords generated_keywords = keywords(text, split=True) # To be compared to the reference. reference_keywords = get_text_from_test_data("mihalcea_tarau.kw.txt").split("\n") self.assertEqual({str(x) for x in generated_keywords}, {str(x) for x in reference_keywords})
def test_reference_text_summarization(self): text = get_text_from_test_data("mihalcea_tarau.txt") # Makes a summary of the text. generated_summary = summarize(text) # To be compared to the method reference. summary = get_text_from_test_data("mihalcea_tarau.summ.txt") self.assertEqual(generated_summary, summary)
def test_reference_text_summarization_with_split(self): text = get_text_from_test_data("mihalcea_tarau.txt") # Makes a summary of the text as a list. generated_summary = summarize(text, split=True) # To be compared to the method reference. summary = get_text_from_test_data("mihalcea_tarau.summ.txt") summary = summary.split("\n") self.assertSequenceEqual(generated_summary, summary)
def test_text_summarization_on_short_input_text_is_not_empty_string(self): text = get_text_from_test_data("unrelated.txt") # Keeps the first 8 sentences to make the text shorter. text = "\n".join(text.split('\n')[:8]) self.assertNotEquals(keywords(text, split=True), "")
def test_text_summarization_on_single_input_sentence_with_split_is_empty_list(self): text = get_text_from_test_data("unrelated.txt") # Keeps the first sentence only. text = text.split('\n')[0] self.assertEquals(summarize(text, split=True), [])
def test_text_summarization_on_single_input_sentence_is_empty_string(self): text = get_text_from_test_data("unrelated.txt") # Keeps the first sentence only. text = text.split('\n')[0] self.assertEquals(summarize(text), "")
def test_keywords_ratio(self): text = get_text_from_test_data("mihalcea_tarau.txt") # Check ratio parameter is well behaved. # Because length is taken on tokenized clean text we just check that # ratio 40% is twice as long as ratio 20% selected_docs_20 = keywords(text, ratio=0.2, split=True) selected_docs_40 = keywords(text, ratio=0.4, split=True) self.assertAlmostEqual(float(len(selected_docs_40)) / len(selected_docs_20), 0.4 / 0.2, places=1)
def test_corpus_summarization_ratio(self): text = get_text_from_test_data("mihalcea_tarau.txt") sentences = text.split('\n') # Makes summaries of the text using different ratio parameters. for x in range(1, 10): ratio = x / float(10) selected_sentences = summarize(text, ratio=ratio, split=True) expected_summary_length = int(len(sentences) * ratio) self.assertEqual(len(selected_sentences), expected_summary_length)
def test_few_distinct_words_summarization_with_split_is_empty_list(self): text = get_text_from_test_data("few_distinct_words.txt") self.assertEquals(summarize(text, split=True), [])
def test_few_distinct_words_summarization_is_empty_string(self): text = get_text_from_test_data("few_distinct_words.txt") self.assertEquals(summarize(text), "")
def test_keywords_few_distinct_words_is_empty_string(self): text = get_text_from_test_data("few_distinct_words.txt") self.assertEquals(keywords(text), "")
def test_keywords_few_distinct_words_split_is_empty_list(self): text = get_text_from_test_data("few_distinct_words.txt") self.assertEquals(keywords(text, split=True), [])
def test_repeated_keywords(self): text = get_text_from_test_data("repeated_keywords.txt") kwds = keywords(text) self.assertTrue(len(kwds.splitlines()))
def test_summary_from_unrelated_sentences_and_split_is_not_empty_list(self): # Tests that the summarization of a text with unrelated sentences is not empty string. text = get_text_from_test_data("unrelated.txt") self.assertNotEquals(summarize(text, split=True), [])
def test_summary_from_unrelated_sentences_is_not_empty_string(self): # Tests that the summarization of a text with unrelated sentences is not empty string. text = get_text_from_test_data("unrelated.txt") self.assertNotEquals(summarize(text), u"")