def test_location_method_2(): document = build_document_from_string(""" # na nb nc ha hb ha = 1 + 1 + 0 = 2 middle = 0 ha hb = 2 + 1 + 0 = 3 first = 1 ha hb ha = 3 last = 1 # hc hd hb hc hd = 3 + 1 + 0 = 4 ha hb = 2 + 1 + 0 = 3 """) summarizer = EdmundsonSummarizer() summarizer.null_words = ("na", "nb", "nc", "nd", "ne",) sentences = summarizer.location_method(document, 4, w_p1=0, w_p2=0) assert list(map(to_unicode, sentences)) == [ "ha hb = 2 + 1 + 0 = 3", "ha hb ha = 3", "hb hc hd = 3 + 1 + 0 = 4", "ha hb = 2 + 1 + 0 = 3", ]
def test_less_sentences_than_requested(): document = build_document_from_string(""" This is only one sentence. """) summarizer = RandomSummarizer() sentences = summarizer(document, 10) assert len(sentences) == 1 assert to_unicode(sentences[0]) == "This is only one sentence."
def test_headings(): document = build_document_from_string(""" Nějaký muž šel kolem naší zahrady Nějaký jiný muž šel kolem vaší zahrady # Nová myšlenka Už už abych taky šel """) assert list(map(to_unicode, document.headings)) == ["Nová myšlenka"]
def test_only_instances_of_sentence_allowed(): document = build_document_from_string(""" Nějaký muž šel kolem naší zahrady Nějaký jiný muž šel kolem vaší zahrady # Nová myšlenka Už už abych taky šel """) with pytest.raises(TypeError): Paragraph(list(document.sentences) + ["Last sentence"])
def test_sentences_in_right_order(): document = build_document_from_string(""" # Heading one First sentence. Second sentence. Third sentence. """) summarizer = RandomSummarizer() sentences = summarizer(document, 4) assert len(sentences) == 3 assert to_unicode(sentences[0]) == "First sentence." assert to_unicode(sentences[1]) == "Second sentence." assert to_unicode(sentences[2]) == "Third sentence."
def test_more_sentences_than_requested(): document = build_document_from_string(""" # Heading one First sentence. Second sentence. Third sentence. # Heading two I like sentences They are so wordy And have many many letters And are green in my editor But someone doesn't like them :( """) summarizer = RandomSummarizer() sentences = summarizer(document, 4) assert len(sentences) == 4
def test_title_method_1(): document = build_document_from_string(""" # This is cool heading Because I am sentence I like words And because I am string I like characters # blank and heading This is next paragraph because of blank line above Here is the winner because contains words like cool and heading """) summarizer = EdmundsonSummarizer() summarizer.null_words = ("this", "is", "I", "am", "and",) sentences = summarizer.title_method(document, 1) assert list(map(to_unicode, sentences)) == [ "Here is the winner because contains words like cool and heading", ]
def test_mixed_cue_key(): document = build_document_from_string(""" # This is cool heading Because I am sentence I like words And because I am string I like characters # blank and heading This is next paragraph because of blank line above Here is the winner because contains words like cool and heading """) summarizer = EdmundsonSummarizer(cue_weight=1, key_weight=1, title_weight=0, location_weight=0) summarizer.bonus_words = ("cool", "heading", "sentence", "words", "like", "because") summarizer.stigma_words = ("this", "is", "I", "am", "and",) sentences = summarizer(document, 2) assert list(map(to_unicode, sentences)) == [ "Because I am sentence I like words", "Here is the winner because contains words like cool and heading", ]