Beispiel #1
0
    def test_cue_2(self):
        document = build_document(("ba bb bc bb unknown ľščťžýáíé sb sc sb", ),
                                  ("Pepek likes spinach", ))

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )
        summarizer.stigma_words = (
            "sa",
            "sb",
            "sc",
        )

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]),
                         "ba bb bc bb unknown ľščťžýáíé sb sc sb")
        self.assertEqual(to_unicode(sentences[1]), "Pepek likes spinach")

        sentences = summarizer.cue_method(document, 1)
        self.assertEqual(len(sentences), 1)
        self.assertEqual(to_unicode(sentences[0]),
                         "ba bb bc bb unknown ľščťžýáíé sb sc sb")
def test_cue_2():
    document = build_document(("ba bb bc bb unknown ľščťžýáíé sb sc sb", ),
                              ("Pepek likes spinach", ))

    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "ba",
        "bb",
        "bc",
    )
    summarizer.stigma_words = (
        "sa",
        "sb",
        "sc",
    )

    sentences = summarizer.cue_method(document, 10)

    assert list(map(to_unicode, sentences)) == [
        "ba bb bc bb unknown ľščťžýáíé sb sc sb",
        "Pepek likes spinach",
    ]

    sentences = summarizer.cue_method(document, 1)

    assert list(map(to_unicode, sentences)) == [
        "ba bb bc bb unknown ľščťžýáíé sb sc sb",
    ]
def test_cue_with_no_bonus_words():
    summarizer = EdmundsonSummarizer()
    summarizer.stigma_words = (
        "useless",
        "bad",
        "spinach",
    )

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)
def test_cue_with_no_stigma_words():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "great",
        "very",
        "beautiful",
    )

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)
Beispiel #5
0
    def test_cue_3(self):
        document = build_document((
            "ba " * 10,
            "bb " * 10,
            " sa" * 8 + " bb" * 10,
            "bb bc ba",
        ), (), (
            "babbbc " * 10,
            "na nb nc nd sa" + " bc" * 10,
            " ba n" * 10,
        ))

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )
        summarizer.stigma_words = (
            "sa",
            "sb",
            "sc",
        )

        sentences = summarizer.cue_method(document, 5)
        self.assertEqual(len(sentences), 5)
        self.assertEqual(to_unicode(sentences[0]), ("ba " * 10).strip())
        self.assertEqual(to_unicode(sentences[1]), ("bb " * 10).strip())
        self.assertEqual(to_unicode(sentences[2]), "bb bc ba")
        self.assertEqual(to_unicode(sentences[3]),
                         "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc")
        self.assertEqual(to_unicode(sentences[4]), ("ba n " * 10).strip())
Beispiel #6
0
    def test_cue_empty(self):
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)
def test_cue_3():
    document = build_document((
        "ba " * 10,
        "bb " * 10,
        " sa" * 8 + " bb" * 10,
        "bb bc ba",
    ), (), (
        "babbbc " * 10,
        "na nb nc nd sa" + " bc" * 10,
        " ba n" * 10,
    ))
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "ba",
        "bb",
        "bc",
    )
    summarizer.stigma_words = (
        "sa",
        "sb",
        "sc",
    )

    sentences = summarizer.cue_method(document, 5)

    assert list(map(to_unicode, sentences)) == [
        ("ba " * 10).strip(),
        ("bb " * 10).strip(),
        "bb bc ba",
        "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc",
        ("ba n " * 10).strip(),
    ]
Beispiel #8
0
    def test_cue_3(self):
        document = build_document(
            (
                "ba "*10,
                "bb "*10,
                " sa"*8 + " bb"*10,
                "bb bc ba",
            ),
            (),
            (
                "babbbc "*10,
                "na nb nc nd sa" + " bc"*10,
                " ba n"*10,
            )
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(document, 5)
        self.assertEqual(len(sentences), 5)
        self.assertEqual(to_unicode(sentences[0]), ("ba "*10).strip())
        self.assertEqual(to_unicode(sentences[1]), ("bb "*10).strip())
        self.assertEqual(to_unicode(sentences[2]), "bb bc ba")
        self.assertEqual(to_unicode(sentences[3]),
            "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc")
        self.assertEqual(to_unicode(sentences[4]), ("ba n "*10).strip())
Beispiel #9
0
def test_cue_empty():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc",)
    summarizer.stigma_words = ("sa", "sb", "sc",)

    sentences = summarizer.cue_method(build_document(), 10)

    assert list(map(to_unicode, sentences)) == []
Beispiel #10
0
    def test_cue_2(self):
        document = build_document(
            ("ba bb bc bb unknown ľščťžýáíé sb sc sb",),
            ("Pepek likes spinach",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]),
            "ba bb bc bb unknown ľščťžýáíé sb sc sb")
        self.assertEqual(to_unicode(sentences[1]), "Pepek likes spinach")

        sentences = summarizer.cue_method(document, 1)
        self.assertEqual(len(sentences), 1)
        self.assertEqual(to_unicode(sentences[0]),
            "ba bb bc bb unknown ľščťžýáíé sb sc sb")
Beispiel #11
0
    def test_cue_1(self):
        document = build_document(
            ("ba bb bc bb unknown ľščťžýáíé sb sc sb",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("ba", "bb", "bc",)
        summarizer.stigma_words = ("sa", "sb", "sc",)

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 1)
Beispiel #12
0
def test_cue_2():
    document = build_document(
        ("ba bb bc bb unknown ľščťžýáíé sb sc sb",),
        ("Pepek likes spinach",)
    )

    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc",)
    summarizer.stigma_words = ("sa", "sb", "sc",)

    sentences = summarizer.cue_method(document, 10)

    assert list(map(to_unicode, sentences)) == [
        "ba bb bc bb unknown ľščťžýáíé sb sc sb",
        "Pepek likes spinach",
    ]

    sentences = summarizer.cue_method(document, 1)

    assert list(map(to_unicode, sentences)) == [
        "ba bb bc bb unknown ľščťžýáíé sb sc sb",
    ]
Beispiel #13
0
    def test_cue_letters_case(self):
        document = build_document(
            ("X X X", "x x x x",),
            ("w w w", "W W W W",)
        )

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = ("X", "w",)
        summarizer.stigma_words = ("stigma",)

        sentences = summarizer.cue_method(document, 2)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]), "x x x x")
        self.assertEqual(to_unicode(sentences[1]), "W W W W")
Beispiel #14
0
    def test_cue_empty(self):
        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )
        summarizer.stigma_words = (
            "sa",
            "sb",
            "sc",
        )

        sentences = summarizer.cue_method(build_document(), 10)
        self.assertEqual(len(sentences), 0)
def test_cue_empty():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "ba",
        "bb",
        "bc",
    )
    summarizer.stigma_words = (
        "sa",
        "sb",
        "sc",
    )

    sentences = summarizer.cue_method(build_document(), 10)

    assert list(map(to_unicode, sentences)) == []
Beispiel #16
0
def test_cue_letters_case():
    document = build_document(
        ("X X X", "x x x x",),
        ("w w w", "W W W W",)
    )

    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("X", "w",)
    summarizer.stigma_words = ("stigma",)

    sentences = summarizer.cue_method(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "x x x x",
        "W W W W",
    ]
Beispiel #17
0
    def test_cue_1(self):
        document = build_document(("ba bb bc bb unknown ľščťžýáíé sb sc sb", ))

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "ba",
            "bb",
            "bc",
        )
        summarizer.stigma_words = (
            "sa",
            "sb",
            "sc",
        )

        sentences = summarizer.cue_method(document, 10)
        self.assertEqual(len(sentences), 1)
Beispiel #18
0
    def test_cue_letters_case(self):
        document = build_document((
            "X X X",
            "x x x x",
        ), (
            "w w w",
            "W W W W",
        ))

        summarizer = EdmundsonSummarizer()
        summarizer.bonus_words = (
            "X",
            "w",
        )
        summarizer.stigma_words = ("stigma", )

        sentences = summarizer.cue_method(document, 2)
        self.assertEqual(len(sentences), 2)
        self.assertEqual(to_unicode(sentences[0]), "x x x x")
        self.assertEqual(to_unicode(sentences[1]), "W W W W")
def test_cue_letters_case():
    document = build_document((
        "X X X",
        "x x x x",
    ), (
        "w w w",
        "W W W W",
    ))

    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = (
        "X",
        "w",
    )
    summarizer.stigma_words = ("stigma", )

    sentences = summarizer.cue_method(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "x x x x",
        "W W W W",
    ]
Beispiel #20
0
def test_cue_with_no_bonus_words():
    summarizer = EdmundsonSummarizer()
    summarizer.stigma_words = ("useless", "bad", "spinach",)

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)
def test_cue_with_no_words():
    summarizer = EdmundsonSummarizer()

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)
Beispiel #22
0
def test_cue_with_no_stigma_words():
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("great", "very", "beautiful",)

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)
Beispiel #23
0
def test_cue_with_no_words():
    summarizer = EdmundsonSummarizer()

    with pytest.raises(ValueError):
        summarizer.cue_method(build_document(), 10)
Beispiel #24
-1
def test_cue_3():
    document = build_document(
        (
            "ba "*10,
            "bb "*10,
            " sa"*8 + " bb"*10,
            "bb bc ba",
        ),
        (),
        (
            "babbbc "*10,
            "na nb nc nd sa" + " bc"*10,
            " ba n"*10,
        )
    )
    summarizer = EdmundsonSummarizer()
    summarizer.bonus_words = ("ba", "bb", "bc",)
    summarizer.stigma_words = ("sa", "sb", "sc",)

    sentences = summarizer.cue_method(document, 5)

    assert list(map(to_unicode, sentences)) == [
        ("ba "*10).strip(),
        ("bb "*10).strip(),
        "bb bc ba",
        "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc",
        ("ba n "*10).strip(),
    ]