Ejemplo n.º 1
0
    def test_text_keywords(self):
        text = get_text_from_test_data("mihalcea_tarau.txt")

        # Calculate keywords
        generated_keywords = keywords(text, split=True)

        # To be compared to the reference.
        reference_keywords = get_text_from_test_data("mihalcea_tarau.kw.txt").split("\n")

        self.assertEqual({str(x) for x in generated_keywords}, {str(x) for x in reference_keywords})
Ejemplo n.º 2
0
    def test_reference_text_summarization(self):
        text = get_text_from_test_data("mihalcea_tarau.txt")

        # Makes a summary of the text.
        generated_summary = summarize(text)

        # To be compared to the method reference.
        summary = get_text_from_test_data("mihalcea_tarau.summ.txt")

        self.assertEqual(generated_summary, summary)
Ejemplo n.º 3
0
    def test_reference_text_summarization_with_split(self):
        text = get_text_from_test_data("mihalcea_tarau.txt")

        # Makes a summary of the text as a list.
        generated_summary = summarize(text, split=True)

        # To be compared to the method reference.
        summary = get_text_from_test_data("mihalcea_tarau.summ.txt")
        summary = summary.split("\n")

        self.assertSequenceEqual(generated_summary, summary)
Ejemplo n.º 4
0
    def test_text_summarization_on_short_input_text_is_not_empty_string(self):
        text = get_text_from_test_data("unrelated.txt")

        # Keeps the first 8 sentences to make the text shorter.
        text = "\n".join(text.split('\n')[:8])

        self.assertNotEquals(keywords(text, split=True), "")
Ejemplo n.º 5
0
    def test_text_summarization_on_single_input_sentence_with_split_is_empty_list(self):
        text = get_text_from_test_data("unrelated.txt")

        # Keeps the first sentence only.
        text = text.split('\n')[0]

        self.assertEquals(summarize(text, split=True), [])
Ejemplo n.º 6
0
    def test_text_summarization_on_single_input_sentence_is_empty_string(self):
        text = get_text_from_test_data("unrelated.txt")

        # Keeps the first sentence only.
        text = text.split('\n')[0]

        self.assertEquals(summarize(text), "")
Ejemplo n.º 7
0
    def test_keywords_ratio(self):
        text = get_text_from_test_data("mihalcea_tarau.txt")

        # Check ratio parameter is well behaved.
        # Because length is taken on tokenized clean text we just check that
        # ratio 40% is twice as long as ratio 20%
        selected_docs_20 = keywords(text, ratio=0.2, split=True)
        selected_docs_40 = keywords(text, ratio=0.4, split=True)

        self.assertAlmostEqual(float(len(selected_docs_40)) / len(selected_docs_20), 0.4 / 0.2, places=1)
Ejemplo n.º 8
0
    def test_corpus_summarization_ratio(self):
        text = get_text_from_test_data("mihalcea_tarau.txt")

        sentences = text.split('\n')

        # Makes summaries of the text using different ratio parameters.
        for x in range(1, 10):
            ratio = x / float(10)
            selected_sentences = summarize(text, ratio=ratio, split=True)
            expected_summary_length = int(len(sentences) * ratio)

            self.assertEqual(len(selected_sentences), expected_summary_length)
Ejemplo n.º 9
0
 def test_few_distinct_words_summarization_with_split_is_empty_list(self):
     text = get_text_from_test_data("few_distinct_words.txt")
     self.assertEquals(summarize(text, split=True), [])
Ejemplo n.º 10
0
 def test_few_distinct_words_summarization_is_empty_string(self):
     text = get_text_from_test_data("few_distinct_words.txt")
     self.assertEquals(summarize(text), "")
Ejemplo n.º 11
0
 def test_keywords_few_distinct_words_is_empty_string(self):
     text = get_text_from_test_data("few_distinct_words.txt")
     self.assertEquals(keywords(text), "")
Ejemplo n.º 12
0
 def test_keywords_few_distinct_words_split_is_empty_list(self):
     text = get_text_from_test_data("few_distinct_words.txt")
     self.assertEquals(keywords(text, split=True), [])
Ejemplo n.º 13
0
    def test_repeated_keywords(self):
        text = get_text_from_test_data("repeated_keywords.txt")

        kwds = keywords(text)
        self.assertTrue(len(kwds.splitlines()))
Ejemplo n.º 14
0
 def test_summary_from_unrelated_sentences_and_split_is_not_empty_list(self):
     # Tests that the summarization of a text with unrelated sentences is not empty string.
     text = get_text_from_test_data("unrelated.txt")
     self.assertNotEquals(summarize(text, split=True), [])
Ejemplo n.º 15
0
 def test_summary_from_unrelated_sentences_is_not_empty_string(self):
     # Tests that the summarization of a text with unrelated sentences is not empty string.
     text = get_text_from_test_data("unrelated.txt")
     self.assertNotEquals(summarize(text), u"")