Python EdmundsonSummarizer Beispiele, sumy.summarizers.edmundson.EdmundsonSummarizer Python Beispiele

Beispiel #1

1

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_key_empty():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc",)

    sentences = summarizer.key_method(build_document(), 10)

    assert list(map(to_unicode, sentences)) == []

Beispiel #2

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_location_method_2(self):
        document = build_document_from_string("""
            # na nb nc ha hb
            ha = 1 + 1 + 0 = 2
            middle = 0
            ha hb = 2 + 1 + 0 = 3

            first = 1
            ha hb ha = 3
            last = 1

            # hc hd
            hb hc hd = 3 + 1 + 0 = 4
            ha hb = 2 + 1 + 0 = 3
        """)

        summarizer = EdmundsonSummarizer()
        summarizer.null_words = ("na", "nb", "nc", "nd", "ne",)

        sentences = summarizer.location_method(document, 4, w_p1=0, w_p2=0)
        self.assertEqual(len(sentences), 4)
        self.assertEqual(to_unicode(sentences[0]), "ha hb = 2 + 1 + 0 = 3")
        self.assertEqual(to_unicode(sentences[1]), "ha hb ha = 3")
        self.assertEqual(to_unicode(sentences[2]), "hb hc hd = 3 + 1 + 0 = 4")
        self.assertEqual(to_unicode(sentences[3]), "ha hb = 2 + 1 + 0 = 3")

Beispiel #3

0

Datei anzeigen

    def test_cue_3(self):
        document = build_document((
            "ba " * 10,
            "bb " * 10,
            " sa" * 8 + " bb" * 10,
            "bb bc ba",
        ), (), (
            "babbbc " * 10,
            "na nb nc nd sa" + " bc" * 10,
            " ba n" * 10,
        ))

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )
        summarizer.stigma_words = (
            "sa",
            "sb",
            "sc",
        )

        sentences = summarizer.cue_method(document, 5)
        self.assertEqual(len(sentences), 5)
        self.assertEqual(to_unicode(sentences[0]), ("ba " * 10).strip())
        self.assertEqual(to_unicode(sentences[1]), ("bb " * 10).strip())
        self.assertEqual(to_unicode(sentences[2]), "bb bc ba")
        self.assertEqual(to_unicode(sentences[3]),
                         "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc")
        self.assertEqual(to_unicode(sentences[4]), ("ba n " * 10).strip())

Beispiel #4

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_location_method_2():
    document = build_document_from_string("""
        # na nb nc ha hb
        ha = 1 + 1 + 0 = 2
        middle = 0
        ha hb = 2 + 1 + 0 = 3

        first = 1
        ha hb ha = 3
        last = 1

        # hc hd
        hb hc hd = 3 + 1 + 0 = 4
        ha hb = 2 + 1 + 0 = 3
    """)

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = ("na", "nb", "nc", "nd", "ne",)

    sentences = summarizer.location_method(document, 4, w_p1=0, w_p2=0)

    assert list(map(to_unicode, sentences)) == [
        "ha hb = 2 + 1 + 0 = 3",
        "ha hb ha = 3",
        "hb hc hd = 3 + 1 + 0 = 4",
        "ha hb = 2 + 1 + 0 = 3",
    ]

Beispiel #5

0

Datei anzeigen

Datei: languageApp.py Projekt: hdaSprachtechnologie/easy-to-understand_language

    def edmunson(self, text):

        # Sprache wählen
        language = "german"
        # Die Prozentzahl vom Schieberegler ziehen
        divident = 100 / self.scale.get()

        # Den Text tokenizen und einen Stemmer zum Summarizer hinzufügen
        parser = PlaintextParser.from_string(text, Tokenizer(language))
        stemmer = Stemmer(language)
        summarizer = Summarizer(stemmer)

        # Spezifische Wortlisten definieren
        # Die bonus, stigma und null words sollen nicht genutzt werden aber es wird kein leerer Input akzeptiert
        summarizer.stop_words = get_stop_words(language)
        summarizer.bonus_words = ["nsdgdf"]
        summarizer.stigma_words = ["mtrtf"]
        summarizer.null_words = ["zngg"]

        summary = ""
        count = 0

        # Anzahl der Sätzte zählen
        for sentence in summarizer(parser.document, 10000000000):
            count += 1

        # Die Satzanzahl aus dem Przentanteil ermitteln
        sentence_number = round(count / divident)

        # Die Sätze zu einem Text zusammenfügen
        for sentence in summarizer(parser.document, sentence_number):
            summary += " " + str(sentence)

        return summary

Beispiel #6

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_cue_3():
    document = build_document((
        "ba " * 10,
        "bb " * 10,
        " sa" * 8 + " bb" * 10,
        "bb bc ba",
    ), (), (
        "babbbc " * 10,
        "na nb nc nd sa" + " bc" * 10,
        " ba n" * 10,
    ))
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "ba",
        "bb",
        "bc",
    )
    summarizer.stigma_words = (
        "sa",
        "sb",
        "sc",
    )

    sentences = summarizer.cue_method(document, 5)

    assert list(map(to_unicode, sentences)) == [
        ("ba " * 10).strip(),
        ("bb " * 10).strip(),
        "bb bc ba",
        "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc",
        ("ba n " * 10).strip(),
    ]

Beispiel #7

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_empty(self):
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #8

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_key_3():
    document = build_document((
        "wa",
        "wa wa",
        "wa wa wa",
        "wa wa wa wa",
        "wa Wa Wa Wa wa",
    ), ("x X x X", ))
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "wa",
        "X",
    )

    sentences = summarizer.key_method(document, 3)
    assert list(map(to_unicode, sentences)) == [
        "wa wa wa",
        "wa wa wa wa",
        "wa Wa Wa Wa wa",
    ]

    sentences = summarizer.key_method(document, 3, weight=0)
    assert list(map(to_unicode, sentences)) == [
        "wa wa wa wa",
        "wa Wa Wa Wa wa",
        "x X x X",
    ]

Beispiel #9

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_title_method_2():
    document = build_document_from_string("""
        # This is cool heading
        Because I am sentence I like words
        And because I am string I like characters

        # blank and heading
        This is next paragraph because of blank line above
        Here is the winner because contains words like cool and heading
    """)

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = (
        "this",
        "is",
        "I",
        "am",
        "and",
    )

    sentences = summarizer.title_method(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "This is next paragraph because of blank line above",
        "Here is the winner because contains words like cool and heading",
    ]

Beispiel #10

0

Datei anzeigen

    def test_mixed_cue_key(self):
        document = build_document_from_string("""
            # This is cool heading
            Because I am sentence I like words
            And because I am string I like characters

            # blank and heading
            This is next paragraph because of blank line above
            Here is the winner because contains words like cool and heading
        """)

        summarizer = EdmundsonSummarizer(cue_weight=1,
                                         key_weight=1,
                                         title_weight=0,
                                         location_weight=0)
        summarizer.bonus_words = ("cool", "heading", "sentence", "words",
                                  "like", "because")
        summarizer.stigma_words = (
            "this",
            "is",
            "I",
            "am",
            "and",
        )

        sentences = summarizer(document, 2)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]),
                         "Because I am sentence I like words")
        self.assertEqual(
            to_unicode(sentences[1]),
            "Here is the winner because contains words like cool and heading")

Beispiel #11

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_location_method_with_empty_document():
    summarizer = EdmundsonSummarizer()
    summarizer.null_words = ("na", "nb", "nc",)

    sentences = summarizer.location_method(build_document(), 10)

    assert list(map(to_unicode, sentences)) == []

Beispiel #12

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_location_method_2():
    document = build_document_from_string("""
        # na nb nc ha hb
        ha = 1 + 1 + 0 = 2
        middle = 0
        ha hb = 2 + 1 + 0 = 3

        first = 1
        ha hb ha = 3
        last = 1

        # hc hd
        hb hc hd = 3 + 1 + 0 = 4
        ha hb = 2 + 1 + 0 = 3
    """)

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = (
        "na",
        "nb",
        "nc",
        "nd",
        "ne",
    )

    sentences = summarizer.location_method(document, 4, w_p1=0, w_p2=0)

    assert list(map(to_unicode, sentences)) == [
        "ha hb = 2 + 1 + 0 = 3",
        "ha hb ha = 3",
        "hb hc hd = 3 + 1 + 0 = 4",
        "ha hb = 2 + 1 + 0 = 3",
    ]

Beispiel #13

0

Datei anzeigen

    def test_location_method_2(self):
        document = build_document_from_string("""
            # na nb nc ha hb
            ha = 1 + 1 + 0 = 2
            middle = 0
            ha hb = 2 + 1 + 0 = 3

            first = 1
            ha hb ha = 3
            last = 1

            # hc hd
            hb hc hd = 3 + 1 + 0 = 4
            ha hb = 2 + 1 + 0 = 3
        """)

        summarizer = EdmundsonSummarizer()
        summarizer.null_words = (
            "na",
            "nb",
            "nc",
            "nd",
            "ne",
        )

        sentences = summarizer.location_method(document, 4, w_p1=0, w_p2=0)
        self.assertEqual(len(sentences), 4)
        self.assertEqual(to_unicode(sentences[0]), "ha hb = 2 + 1 + 0 = 3")
        self.assertEqual(to_unicode(sentences[1]), "ha hb ha = 3")
        self.assertEqual(to_unicode(sentences[2]), "hb hc hd = 3 + 1 + 0 = 4")
        self.assertEqual(to_unicode(sentences[3]), "ha hb = 2 + 1 + 0 = 3")

Beispiel #14

0

Datei anzeigen

    def test_key_3(self):
        document = build_document((
            "wa",
            "wa wa",
            "wa wa wa",
            "wa wa wa wa",
            "wa Wa Wa Wa wa",
        ), ("x X x X", ))
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "wa",
            "X",
        )

        sentences = summarizer.key_method(document, 3)
        self.assertEqual(len(sentences), 3)
        self.assertEqual(to_unicode(sentences[0]), "wa wa wa")
        self.assertEqual(to_unicode(sentences[1]), "wa wa wa wa")
        self.assertEqual(to_unicode(sentences[2]), "wa Wa Wa Wa wa")

        sentences = summarizer.key_method(document, 3, weight=0)
        self.assertEqual(len(sentences), 3)
        self.assertEqual(to_unicode(sentences[0]), "wa wa wa wa")
        self.assertEqual(to_unicode(sentences[1]), "wa Wa Wa Wa wa")
        self.assertEqual(to_unicode(sentences[2]), "x X x X")

Beispiel #15

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_3(self):
        document = build_document(
            (
                "ba "*10,
                "bb "*10,
                " sa"*8 + " bb"*10,
                "bb bc ba",
            ),
            (),
            (
                "babbbc "*10,
                "na nb nc nd sa" + " bc"*10,
                " ba n"*10,
            )
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(document, 5)
        self.assertEqual(len(sentences), 5)
        self.assertEqual(to_unicode(sentences[0]), ("ba "*10).strip())
        self.assertEqual(to_unicode(sentences[1]), ("bb "*10).strip())
        self.assertEqual(to_unicode(sentences[2]), "bb bc ba")
        self.assertEqual(to_unicode(sentences[3]),
            "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc")
        self.assertEqual(to_unicode(sentences[4]), ("ba n "*10).strip())

Beispiel #16

0

Datei anzeigen

    def test_title_method_3(self):
        document = build_document_from_string("""
            # This is cool heading
            Because I am sentence I like words
            And because I am string I like characters

            # blank and heading
            This is next paragraph because of blank line above
            Here is the winner because contains words like cool and heading
        """)

        summarizer = EdmundsonSummarizer()
        summarizer.null_words = (
            "this",
            "is",
            "I",
            "am",
            "and",
        )

        sentences = summarizer.title_method(document, 3)
        self.assertEqual(len(sentences), 3)
        self.assertEqual(to_unicode(sentences[0]),
                         "Because I am sentence I like words")
        self.assertEqual(to_unicode(sentences[1]),
                         "This is next paragraph because of blank line above")
        self.assertEqual(
            to_unicode(sentences[2]),
            "Here is the winner because contains words like cool and heading")

Beispiel #17

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_null_words_property():
    summarizer = EdmundsonSummarizer()

    assert summarizer.null_words == frozenset()

    words = ("word", "another", "and", "some", "next",)
    summarizer.null_words = words

    assert summarizer.null_words == frozenset(words)

Beispiel #18

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_null_words_property(self):
        summarizer = EdmundsonSummarizer()

        self.assertEqual(summarizer.null_words, frozenset())

        words = ("word", "another", "and", "some", "next",)
        summarizer.null_words = words
        self.assertTrue(isinstance(summarizer.null_words, frozenset))
        self.assertEqual(summarizer.null_words, frozenset(words))

Beispiel #19

0

Datei anzeigen

    def test_cue_with_no_stigma_words(self):
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "great",
            "very",
            "beautiful",
        )

        self.assertRaises(ValueError, summarizer.cue_method, build_document(),
                          10)

Beispiel #20

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_cue_with_no_stigma_words():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "great",
        "very",
        "beautiful",
    )

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)

Beispiel #21

0

Datei anzeigen

    def test_location_method_with_empty_document(self):
        summarizer = EdmundsonSummarizer()
        summarizer.null_words = (
            "na",
            "nb",
            "nc",
        )

        sentences = summarizer.location_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #22

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_cue_with_no_bonus_words():
    summarizer = EdmundsonSummarizer()
    summarizer.stigma_words = (
        "useless",
        "bad",
        "spinach",
    )

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)

Beispiel #23

0

Datei anzeigen

    def test_cue_with_no_bonus_words(self):
        summarizer = EdmundsonSummarizer()
        summarizer.stigma_words = (
            "useless",
            "bad",
            "spinach",
        )

        self.assertRaises(ValueError, summarizer.cue_method, build_document(),
                          10)

Beispiel #24

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_title_method_with_empty_document():
    summarizer = EdmundsonSummarizer()
    summarizer.null_words = (
        "ba",
        "bb",
        "bc",
    )

    sentences = summarizer.title_method(build_document(), 10)
    assert list(map(to_unicode, sentences)) == []

Beispiel #25

0

Datei anzeigen

    def test_title_method_with_empty_document(self):
        summarizer = EdmundsonSummarizer()
        summarizer.null_words = (
            "ba",
            "bb",
            "bc",
        )

        sentences = summarizer.title_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #26

0

Datei anzeigen

    def test_key_empty(self):
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )

        sentences = summarizer.key_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #27

0

Datei anzeigen

Datei: edmundson_strategy.py Projekt: andredantasrocha/contact-summarizer

 def __summarize(self, parser):
     summarizer = EdmundsonSummarizer(Stemmer(self.__language))
     # words of high importance
     summarizer.bonus_words = ('info', 'information', 'due', 'overdue',
                               'withdraw', 'balance', 'fee', 'letter',
                               'compliance', 'super')
     # words of low importance or even negative importance?
     summarizer.stigma_words = 'zdfgthdvndadv'
     summarizer.null_words = 'zdfgthdvndadv'
     final_sentences = summarizer(parser.document, self.__sentences_count)
     return self.__join_sentences(final_sentences)

Beispiel #28

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_key_empty():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "ba",
        "bb",
        "bc",
    )

    sentences = summarizer.key_method(build_document(), 10)

    assert list(map(to_unicode, sentences)) == []

Beispiel #29

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_1(self):
        document = build_document(
            ("ba bb bc bb unknown ľščťžýáíé sb sc sb",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 1)

Beispiel #30

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_location_method_with_empty_document():
    summarizer = EdmundsonSummarizer()
    summarizer.null_words = (
        "na",
        "nb",
        "nc",
    )

    sentences = summarizer.location_method(build_document(), 10)

    assert list(map(to_unicode, sentences)) == []

Beispiel #31

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_key_1(self):
        document = build_document(
            ("wa wb wc wd", "I like music",),
            ("This is test sentence with some extra words and bonus",)
        )
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc", "bonus",)

        sentences = summarizer.key_method(document, 1)
        self.assertEqual(len(sentences), 1)
        self.assertEqual(to_unicode(sentences[0]),
            "This is test sentence with some extra words and bonus")

Beispiel #32

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_key_1():
    document = build_document(
        ("wa wb wc wd", "I like music",),
        ("This is test sentence with some extra words and bonus",)
    )
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc", "bonus",)

    sentences = summarizer.key_method(document, 1)

    assert list(map(to_unicode, sentences)) == [
        "This is test sentence with some extra words and bonus",
    ]

Beispiel #33

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_key_2(self):
        document = build_document(
            ("Om nom nom nom nom", "Sure I summarize it, with bonus",),
            ("This is bonus test sentence with some extra words and bonus",)
        )
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("nom", "bonus",)

        sentences = summarizer.key_method(document, 2)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]), "Om nom nom nom nom")
        self.assertEqual(to_unicode(sentences[1]),
            "This is bonus test sentence with some extra words and bonus")

Beispiel #34

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_key_2():
    document = build_document(
        ("Om nom nom nom nom", "Sure I summarize it, with bonus",),
        ("This is bonus test sentence with some extra words and bonus",)
    )
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("nom", "bonus",)

    sentences = summarizer.key_method(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "Om nom nom nom nom",
        "This is bonus test sentence with some extra words and bonus",
    ]

Beispiel #35

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_title_method_without_title(self):
        document = build_document(
            ("This is sentence", "This is another one",),
            ("And some next sentence but no heading",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.null_words = ("this", "is", "some", "and",)

        sentences = summarizer.title_method(document, 10)
        self.assertEqual(len(sentences), 3)
        self.assertEqual(to_unicode(sentences[0]), "This is sentence")
        self.assertEqual(to_unicode(sentences[1]), "This is another one")
        self.assertEqual(to_unicode(sentences[2]), "And some next sentence but no heading")

Beispiel #36

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_letters_case(self):
        document = build_document(
            ("X X X", "x x x x",),
            ("w w w", "W W W W",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("X", "w",)
        summarizer.stigma_words = ("stigma",)

        sentences = summarizer.cue_method(document, 2)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]), "x x x x")
        self.assertEqual(to_unicode(sentences[1]), "W W W W")

Beispiel #37

0

Datei anzeigen

def Edmundson(rsc_file, dst_file, count):
    language = "chinese"
    parser = PlaintextParser.from_file(rsc_file,
                                       Tokenizer(language),
                                       encoding='utf-8')
    stemmer = Stemmer(language)  # 语言容器

    summarizer = EdmundsonSummarizer(stemmer)  # Luhn算法
    summarizer.stop_words = get_stop_words(language)
    with open(dst_file, 'w', encoding='utf-8') as f:
        for sentence in summarizer(parser.document, count):
            f.write(str(sentence))
            f.write('\n')
            print(sentence)

Beispiel #38

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_title_method_without_title():
    document = build_document(
        ("This is sentence", "This is another one",),
        ("And some next sentence but no heading",)
    )

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = ("this", "is", "some", "and",)

    sentences = summarizer.title_method(document, 10)
    assert list(map(to_unicode, sentences)) == [
        "This is sentence",
        "This is another one",
        "And some next sentence but no heading",
    ]

Beispiel #39

0

Datei anzeigen

    def test_null_words_property(self):
        summarizer = EdmundsonSummarizer()

        self.assertEqual(summarizer.null_words, frozenset())

        words = (
            "word",
            "another",
            "and",
            "some",
            "next",
        )
        summarizer.null_words = words
        self.assertTrue(isinstance(summarizer.null_words, frozenset))
        self.assertEqual(summarizer.null_words, frozenset(words))

Beispiel #40

0

Datei anzeigen

Datei: test_edmundson.py Projekt: lakshaym30/StockAnalysis

def test_null_words_property():
    summarizer = EdmundsonSummarizer()

    assert summarizer.null_words == frozenset()

    words = (
        "word",
        "another",
        "and",
        "some",
        "next",
    )
    summarizer.null_words = words

    assert summarizer.null_words == frozenset(words)

Beispiel #41

0

Datei anzeigen

    def summarize_with_info(self, corpus, length, algorithm):
        parser = PlaintextParser.from_string(corpus, Tokenizer(self.LANGUAGE))

        if algorithm == "textrank":
            summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lexrank":
            summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "luhn":
            summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "edmundson":
            summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
            summarizer.bonus_words = parser.significant_words
            summarizer.stigma_words = parser.stigma_words
        elif algorithm == "kl":
            summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lsa":
            summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "sumbasic":
            summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "random":
            summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
        else:
            raise NotImplemented("Summary algorithm is not available")

        summarizer.stop_words = get_stop_words(self.LANGUAGE)

        return summarizer(parser.document, length)

Beispiel #42

0

Datei anzeigen

    def summarize(self, corpus, length, algorithm):
        parser = PlaintextParser.from_string(corpus, Tokenizer(self.LANGUAGE))

        if algorithm == "textrank":
            summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lexrank":
            summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "luhn":
            summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "edmundson":
            summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "kl":
            summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lsa":
            summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "sumbasic":
            summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "random":
            summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
        else:
            raise NotImplemented("Summary algorithm is not available")

        summarizer.stop_words = get_stop_words(self.LANGUAGE)
        summary = " ".join(
            [obj._text for obj in summarizer(parser.document, length)])

        return summary

Beispiel #43

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_cue_letters_case():
    document = build_document(
        ("X X X", "x x x x",),
        ("w w w", "W W W W",)
    )

    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("X", "w",)
    summarizer.stigma_words = ("stigma",)

    sentences = summarizer.cue_method(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "x x x x",
        "W W W W",
    ]

Beispiel #44

0

Datei anzeigen

Datei: LexRank_Luhn_LSA.py Projekt: thesalmonification/financialsummary

def EdmundsonSummary(document, sentences):
    parser = PlaintextParser.from_string(document, Tokenizer("english"))
    summarizer = EdmundsonSummarizer()
    summary = summarizer(parser.document, sentences)
    # for sentence in summary:
    #     print(sentence)
    return summary

Beispiel #45

0

Datei anzeigen

    def test_key_2(self):
        document = build_document((
            "Om nom nom nom nom",
            "Sure I summarize it, with bonus",
        ), ("This is bonus test sentence with some extra words and bonus", ))
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "nom",
            "bonus",
        )

        sentences = summarizer.key_method(document, 2)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]), "Om nom nom nom nom")
        self.assertEqual(
            to_unicode(sentences[1]),
            "This is bonus test sentence with some extra words and bonus")

Beispiel #46

0

Datei anzeigen

    def test_empty_document(self):
        summarizer = EdmundsonSummarizer(cue_weight=0,
                                         key_weight=0,
                                         title_weight=0,
                                         location_weight=0)

        sentences = summarizer(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #47

0

Datei anzeigen

    def test_cue_1(self):
        document = build_document(("ba bb bc bb unknown ľščťžýáíé sb sc sb", ))

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )
        summarizer.stigma_words = (
            "sa",
            "sb",
            "sc",
        )

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 1)

Beispiel #48

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_title_method_1(self):
        document = build_document_from_string("""
            # This is cool heading
            Because I am sentence I like words
            And because I am string I like characters

            # blank and heading
            This is next paragraph because of blank line above
            Here is the winner because contains words like cool and heading
        """)

        summarizer = EdmundsonSummarizer()
        summarizer.null_words = ("this", "is", "I", "am", "and",)

        sentences = summarizer.title_method(document, 1)
        self.assertEqual(len(sentences), 1)
        self.assertEqual(to_unicode(sentences[0]),
            "Here is the winner because contains words like cool and heading")

Beispiel #49

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_key_3(self):
        document = build_document(
            ("wa", "wa wa", "wa wa wa", "wa wa wa wa", "wa Wa Wa Wa wa",),
            ("x X x X",)
        )
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("wa", "X",)

        sentences = summarizer.key_method(document, 3)
        self.assertEqual(len(sentences), 3)
        self.assertEqual(to_unicode(sentences[0]), "wa wa wa")
        self.assertEqual(to_unicode(sentences[1]), "wa wa wa wa")
        self.assertEqual(to_unicode(sentences[2]), "wa Wa Wa Wa wa")

        sentences = summarizer.key_method(document, 3, weight=0)
        self.assertEqual(len(sentences), 3)
        self.assertEqual(to_unicode(sentences[0]), "wa wa wa wa")
        self.assertEqual(to_unicode(sentences[1]), "wa Wa Wa Wa wa")
        self.assertEqual(to_unicode(sentences[2]), "x X x X")

Beispiel #50

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_2(self):
        document = build_document(
            ("ba bb bc bb unknown ľščťžýáíé sb sc sb",),
            ("Pepek likes spinach",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]),
            "ba bb bc bb unknown ľščťžýáíé sb sc sb")
        self.assertEqual(to_unicode(sentences[1]), "Pepek likes spinach")

        sentences = summarizer.cue_method(document, 1)
        self.assertEqual(len(sentences), 1)
        self.assertEqual(to_unicode(sentences[0]),
            "ba bb bc bb unknown ľščťžýáíé sb sc sb")

Beispiel #51

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_mixed_cue_key():
    document = build_document_from_string("""
        # This is cool heading
        Because I am sentence I like words
        And because I am string I like characters

        # blank and heading
        This is next paragraph because of blank line above
        Here is the winner because contains words like cool and heading
    """)
    summarizer = EdmundsonSummarizer(cue_weight=1, key_weight=1, title_weight=0, location_weight=0)
    summarizer.bonus_words = ("cool", "heading", "sentence", "words", "like", "because")
    summarizer.stigma_words = ("this", "is", "I", "am", "and",)

    sentences = summarizer(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "Because I am sentence I like words",
        "Here is the winner because contains words like cool and heading",
    ]

Beispiel #52

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_key_3():
    document = build_document(
        ("wa", "wa wa", "wa wa wa", "wa wa wa wa", "wa Wa Wa Wa wa",),
        ("x X x X",)
    )
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("wa", "X",)

    sentences = summarizer.key_method(document, 3)
    assert list(map(to_unicode, sentences)) == [
        "wa wa wa",
        "wa wa wa wa",
        "wa Wa Wa Wa wa",
    ]

    sentences = summarizer.key_method(document, 3, weight=0)
    assert list(map(to_unicode, sentences)) == [
        "wa wa wa wa",
        "wa Wa Wa Wa wa",
        "x X x X",
    ]

Beispiel #53

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_title_method_3():
    document = build_document_from_string("""
        # This is cool heading
        Because I am sentence I like words
        And because I am string I like characters

        # blank and heading
        This is next paragraph because of blank line above
        Here is the winner because contains words like cool and heading
    """)

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = ("this", "is", "I", "am", "and",)

    sentences = summarizer.title_method(document, 3)

    assert list(map(to_unicode, sentences)) == [
        "Because I am sentence I like words",
        "This is next paragraph because of blank line above",
        "Here is the winner because contains words like cool and heading",
    ]

Beispiel #54

0

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_cue_2():
    document = build_document(
        ("ba bb bc bb unknown ľščťžýáíé sb sc sb",),
        ("Pepek likes spinach",)
    )

    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc",)
    summarizer.stigma_words = ("sa", "sb", "sc",)

    sentences = summarizer.cue_method(document, 10)

    assert list(map(to_unicode, sentences)) == [
        "ba bb bc bb unknown ľščťžýáíé sb sc sb",
        "Pepek likes spinach",
    ]

    sentences = summarizer.cue_method(document, 1)

    assert list(map(to_unicode, sentences)) == [
        "ba bb bc bb unknown ľščťžýáíé sb sc sb",
    ]

Beispiel #55

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_with_no_bonus_words(self):
        summarizer = EdmundsonSummarizer()
        summarizer.stigma_words = ("useless", "bad", "spinach",)

        self.assertRaises(ValueError, summarizer.cue_method, build_document(), 10)

Beispiel #56

0

Datei anzeigen

Datei: base_gen_sumy.py Projekt: kariminf/AllSummarizer

    sys.setdefaultencoding('utf8')
    """
    nltk.data.path.append('/home/kariminf/Data/NLTK/')



    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    """

    file = open(SIZE_FILE, 'r')
    while 1:
        line = file.readline()
        if line == '':
			break;
        parts = line.split(",")
        sizes[parts[0]] = int(parts[1])
    file.close()

    nltk.data.path.append('/home/kariminf/Data/NLTK/')
    for eval in sizes:
    	txt_path = "src/body/text/en/" + eval
        parser = PlaintextParser.from_file(txt_path, Tokenizer(LANGUAGE))
        stemmer = Stemmer(LANGUAGE)
        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)
        summary = extract(summarizer, sizes[eval])
        fout = open("baselines/EdmundsonSummarizer/en/" + eval[:-9] + ".txt", "w")
        fout.write(summary)
        fout.close()

Beispiel #57

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_cue_with_no_stigma_words(self):
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("great", "very", "beautiful",)

        self.assertRaises(ValueError, summarizer.cue_method, build_document(), 10)

Beispiel #58

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_location_method_with_empty_document(self):
        summarizer = EdmundsonSummarizer()
        summarizer.null_words = ("na", "nb", "nc",)

        sentences = summarizer.location_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #59

0

Datei anzeigen

Datei: test_edmundson.py Projekt: likegitcoding/sumy

    def test_title_method_with_empty_document(self):
        summarizer = EdmundsonSummarizer()
        summarizer.null_words = ("ba", "bb", "bc",)

        sentences = summarizer.title_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)

Beispiel #60

-1

Datei anzeigen

Datei: test_edmundson.py Projekt: miso-belica/sumy

def test_cue_3():
    document = build_document(
        (
            "ba "*10,
            "bb "*10,
            " sa"*8 + " bb"*10,
            "bb bc ba",
        ),
        (),
        (
            "babbbc "*10,
            "na nb nc nd sa" + " bc"*10,
            " ba n"*10,
        )
    )
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc",)
    summarizer.stigma_words = ("sa", "sb", "sc",)

    sentences = summarizer.cue_method(document, 5)

    assert list(map(to_unicode, sentences)) == [
        ("ba "*10).strip(),
        ("bb "*10).strip(),
        "bb bc ba",
        "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc",
        ("ba n "*10).strip(),
    ]