Ejemplo n.º 1
0
def test_location_method_2():
    document = build_document_from_string("""
        # na nb nc ha hb
        ha = 1 + 1 + 0 = 2
        middle = 0
        ha hb = 2 + 1 + 0 = 3

        first = 1
        ha hb ha = 3
        last = 1

        # hc hd
        hb hc hd = 3 + 1 + 0 = 4
        ha hb = 2 + 1 + 0 = 3
    """)

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = ("na", "nb", "nc", "nd", "ne",)

    sentences = summarizer.location_method(document, 4, w_p1=0, w_p2=0)

    assert list(map(to_unicode, sentences)) == [
        "ha hb = 2 + 1 + 0 = 3",
        "ha hb ha = 3",
        "hb hc hd = 3 + 1 + 0 = 4",
        "ha hb = 2 + 1 + 0 = 3",
    ]
Ejemplo n.º 2
0
def test_less_sentences_than_requested():
    document = build_document_from_string("""
        This is only one sentence.
    """)
    summarizer = RandomSummarizer()

    sentences = summarizer(document, 10)
    assert len(sentences) == 1
    assert to_unicode(sentences[0]) == "This is only one sentence."
def test_headings():
    document = build_document_from_string("""
        Nějaký muž šel kolem naší zahrady
        Nějaký jiný muž šel kolem vaší zahrady

        # Nová myšlenka
        Už už abych taky šel
    """)

    assert list(map(to_unicode, document.headings)) == ["Nová myšlenka"]
def test_only_instances_of_sentence_allowed():
    document = build_document_from_string("""
        Nějaký muž šel kolem naší zahrady
        Nějaký jiný muž šel kolem vaší zahrady

        # Nová myšlenka
        Už už abych taky šel
    """)

    with pytest.raises(TypeError):
        Paragraph(list(document.sentences) + ["Last sentence"])
Ejemplo n.º 5
0
def test_sentences_in_right_order():
    document = build_document_from_string("""
        # Heading one
        First sentence.
        Second sentence.
        Third sentence.
    """)
    summarizer = RandomSummarizer()

    sentences = summarizer(document, 4)
    assert len(sentences) == 3
    assert to_unicode(sentences[0]) == "First sentence."
    assert to_unicode(sentences[1]) == "Second sentence."
    assert to_unicode(sentences[2]) == "Third sentence."
Ejemplo n.º 6
0
def test_more_sentences_than_requested():
    document = build_document_from_string("""
        # Heading one
        First sentence.
        Second sentence.
        Third sentence.

        # Heading two
        I like sentences
        They are so wordy
        And have many many letters
        And are green in my editor
        But someone doesn't like them :(
    """)
    summarizer = RandomSummarizer()

    sentences = summarizer(document, 4)
    assert len(sentences) == 4
Ejemplo n.º 7
0
def test_title_method_1():
    document = build_document_from_string("""
        # This is cool heading
        Because I am sentence I like words
        And because I am string I like characters

        # blank and heading
        This is next paragraph because of blank line above
        Here is the winner because contains words like cool and heading
    """)

    summarizer = EdmundsonSummarizer()
    summarizer.null_words = ("this", "is", "I", "am", "and",)

    sentences = summarizer.title_method(document, 1)

    assert list(map(to_unicode, sentences)) == [
        "Here is the winner because contains words like cool and heading",
    ]
Ejemplo n.º 8
0
def test_mixed_cue_key():
    document = build_document_from_string("""
        # This is cool heading
        Because I am sentence I like words
        And because I am string I like characters

        # blank and heading
        This is next paragraph because of blank line above
        Here is the winner because contains words like cool and heading
    """)
    summarizer = EdmundsonSummarizer(cue_weight=1, key_weight=1, title_weight=0, location_weight=0)
    summarizer.bonus_words = ("cool", "heading", "sentence", "words", "like", "because")
    summarizer.stigma_words = ("this", "is", "I", "am", "and",)

    sentences = summarizer(document, 2)

    assert list(map(to_unicode, sentences)) == [
        "Because I am sentence I like words",
        "Here is the winner because contains words like cool and heading",
    ]