Example #1
0
def klsum(parser, sentence_count):
    summarizer_6 = KLSummarizer(Stemmer(language))
    summarizer_6.stop_words = get_stop_words(language)
    summary_6 = summarizer_6(parser.document, sentence_count)
    temp = ''
    for sentence in summary_6:
        temp = temp + str(sentence)
    return (temp)
Example #2
0
def kl_summarizer(text, stemmer, language, sentences_count):
    parser = PlaintextParser.from_string(text, Tokenizer(language))
    summarizer_luhn = KLSummarizer(stemmer)
    summarizer_luhn.stop_words = get_stop_words(language)
    sentences = []
    for sentence in summarizer_luhn(parser.document, sentences_count):
        a = sentence
        sentences.append(str(a))
    return "\n".join(sentences)
Example #3
0
def __init__():
    LANGUAGE = "english"
    SENTENCES_COUNT = 1


    stemmer = Stemmer(LANGUAGE)

    lsaSummarizer = Lsa(stemmer)
    lsaSummarizer.stop_words = get_stop_words(LANGUAGE)
    luhnSummarizer = Luhn(stemmer)
    luhnSummarizer.stop_words = get_stop_words(LANGUAGE)
    # edmundsonSummarizer.bonus_words = get_bonus_words

    lexrankSummarizer = LexRank(stemmer)
    lexrankSummarizer.stop_words = get_stop_words(LANGUAGE)

    textrankSummarizer = TxtRank(stemmer)
    textrankSummarizer.stop_words = get_stop_words(LANGUAGE)

    sumbasicSummarizer = SumBasic(stemmer)
    sumbasicSummarizer.stop_words = get_stop_words(LANGUAGE)


    klSummarizer = KL(stemmer)
    klSummarizer.stop_words = get_stop_words(LANGUAGE)

    parser = HtmlParser.from_string(text, 0, Tokenizer(LANGUAGE))

    allvariations = []

    for sentence in lsaSummarizer(parser.document, SENTENCES_COUNT):
       # print("Summarizing text via LSA: ")
        print((str(sentence)))


        allvariations.append(sentence)
    for sentence in luhnSummarizer(parser.document, SENTENCES_COUNT):
        #print("Summarizing text via Luhn: ")
        print(str(sentence))
        allvariations.append(sentence)
    for sentence in lexrankSummarizer(parser.document, SENTENCES_COUNT):
        #print("Summarizing text via Lexrank: ")
        print(str(sentence))
        allvariations.append(sentence)
    for sentence in textrankSummarizer(parser.document, SENTENCES_COUNT):
        #print("Summarizing text via Textrank: ")
        print(str(sentence))
        allvariations.append(sentence)
    for sentence in sumbasicSummarizer(parser.document, SENTENCES_COUNT):
        #print("Summarizing text via Sumbasic: ")
        print(str(sentence))
        allvariations.append(sentence)
    for sentence in klSummarizer(parser.document, SENTENCES_COUNT):
        #print("Summarizing text via klSum: ")
        print(str(sentence))
        allvariations.append(sentence)
        return allvariations
Example #4
0
def test_tf_idf_metric_should_be_real_number():
    """https://github.com/miso-belica/sumy/issues/41"""
    summarizer = KLSummarizer()
    frequencies = summarizer.compute_tf([Sentence("There are five words, jop.", Tokenizer("english"))])

    assert frequencies == {
        "there": 0.2,
        "are": 0.2,
        "five": 0.2,
        "words": 0.2,
        "jop": 0.2,
    }
Example #5
0
def run_sumy(text, algo='KL', sent_count=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    stemmer = Stemmer("english")

    if algo == 'KL':
        summarizer = KLSummarizer(stemmer)
    elif algo == 'LexRank':
        summarizer = LexRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words("english")

    summary_list = summarizer(parser.document, sent_count)
    return summary_list
Example #6
0
def test_tf_idf_metric_should_be_real_number():
    """https://github.com/miso-belica/sumy/issues/41"""
    summarizer = KLSummarizer()
    frequencies = summarizer.compute_tf([Sentence("There are five words, jop.", Tokenizer("english"))])

    assert frequencies == {
        "there": 0.2,
        "are": 0.2,
        "five": 0.2,
        "words": 0.2,
        "jop": 0.2,
    }
Example #7
0
def klReferenceSummary(path):
    sentencesList = []
    parser = PlaintextParser.from_file(path, Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)
    summarizer = KLSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        #print(sentence._text)
        sentencesList.append(sentence._text)

    return sentencesList
Example #8
0
def klReferenceSummary(path):	
	sentencesList=[]
	parser = PlaintextParser.from_file(path, Tokenizer(LANGUAGE))
	stemmer = Stemmer(LANGUAGE)
	summarizer = KLSummarizer(stemmer)
	summarizer.stop_words = get_stop_words(LANGUAGE)
	

	for sentence in summarizer(parser.document, SENTENCES_COUNT):
		#print(sentence._text)
		sentencesList.append(sentence._text)

	return sentencesList
Example #9
0
def KL(rsc_file, dst_file, count):
    language = "chinese"
    parser = PlaintextParser.from_file(rsc_file,
                                       Tokenizer(language),
                                       encoding='utf-8')
    stemmer = Stemmer(language)  # 语言容器

    summarizer = KLSummarizer(stemmer)  # LSA算法
    summarizer.stop_words = get_stop_words(language)
    with open(dst_file, 'w', encoding='utf-8') as f:
        for sentence in summarizer(parser.document, count):
            f.write(str(sentence))
            f.write('\n')
            print(sentence)
Example #10
0
def createSummary (text, language="english", num_sentences=3, method="lexrank"):
    #LANGUAGE = "english"
    #SENTENCES_COUNT = 5
    # url = "https://en.wikipedia.org/wiki/Automatic_summarization"
    # parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
    # or for plain text files
    # parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
    

    # Language tokenizer
    tokenizer = Tokenizer(language)
    parser = PlaintextParser.from_string(text, tokenizer)
    # word stemming
    stemmer = Stemmer(language)

    if (method == "lexrank"):
        summarizer = LexRankSummarizer(stemmer)
    elif (method == "lsa"):
        summarizer = LSASummarizer(stemmer)
    elif (method == "luhn"):
        summarizer = LuhnSummarizer(stemmer)
    elif (method == "kl"):
        summarizer = KLSummarizer(stemmer)
    else:
        raise Exception (f'Unknown summarization method: ${method}')

    summarizer.stop_words = get_stop_words(language)

    result = []
    for sentence in summarizer(parser.document, num_sentences):
        result.append (str(sentence))
    
    return result
Example #11
0
    def summarize(self, corpus, length, algorithm):
        parser = PlaintextParser.from_string(corpus, Tokenizer(self.LANGUAGE))

        if algorithm == "textrank":
            summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lexrank":
            summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "luhn":
            summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "edmundson":
            summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "kl":
            summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lsa":
            summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "sumbasic":
            summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "random":
            summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
        else:
            raise NotImplemented("Summary algorithm is not available")

        summarizer.stop_words = get_stop_words(self.LANGUAGE)
        summary = " ".join(
            [obj._text for obj in summarizer(parser.document, length)])

        return summary
Example #12
0
def find_relevant_quote(book_id, chapter, num_sentences=1, technique='luhn'):
    """
    Create an extractive summary for a chapter of the book.

    Parameters:
    book_id: (str) the book identifier
    chapter: is the chapter number to summarize
    num_sentences: how many sentences to extract

    Returns:
    sentences: the extracted sentences
    """
    chapter_filename = get_data_filename(book_id, 'book_chapters', chapter)
    parser = PlaintextParser.from_file(chapter_filename, Tokenizer("english"))
    if technique == 'lsa':
        summarizer = LsaSummarizer()
    elif technique == 'lexrank':
        summarizer = LexRankSummarizer()
    elif technique == 'textrank':
        summarizer = TextRankSummarizer()
    elif technique == 'kl':
        summarizer = KLSummarizer()
    elif technique == 'random':
        summarizer = RandomSummarizer()
    elif technique == 'reduction':
        summarizer = ReductionSummarizer()
    elif technique == 'sumbasic':
        summarizer = SumBasicSummarizer()
    else:
        summarizer = LuhnSummarizer()
    summary = summarizer(parser.document, num_sentences)
    return summary
def KLSummary(document, sentences):
    parser = PlaintextParser.from_string(document, Tokenizer("english"))
    summarizer = KLSummarizer()
    summary = summarizer(parser.document, sentences)
    # for sentence in summary:
    #     print(sentence)
    return summary
Example #14
0
    def summarize_with_info(self, corpus, length, algorithm):
        parser = PlaintextParser.from_string(corpus, Tokenizer(self.LANGUAGE))

        if algorithm == "textrank":
            summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lexrank":
            summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "luhn":
            summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "edmundson":
            summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
            summarizer.bonus_words = parser.significant_words
            summarizer.stigma_words = parser.stigma_words
        elif algorithm == "kl":
            summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "lsa":
            summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "sumbasic":
            summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
        elif algorithm == "random":
            summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
        else:
            raise NotImplemented("Summary algorithm is not available")

        summarizer.stop_words = get_stop_words(self.LANGUAGE)

        return summarizer(parser.document, length)
Example #15
0
def klsum_summarize(original_text, s):

    global cnt

    cnt += 1

    if (cnt % 100 == 0):
        print(cnt)

    from sumy.summarizers.kl import KLSummarizer

    from sumy.nlp.tokenizers import Tokenizer
    from sumy.parsers.plaintext import PlaintextParser
    parser = PlaintextParser.from_string(original_text, Tokenizer('english'))

    kl_summarizer = KLSummarizer()
    kl_summary = kl_summarizer(parser.document, sentences_count=s)

    # Printing the summary
    res = ""
    for sentence in kl_summary:

        res += (str(sentence) + '.' + "\n")

    return res
def summarize_terms_text(txt):
    text_data = unidecode.unidecode(txt)
    clean_list, pure_list = prepare_for_regex(text_data)

    data_to_summarize = []
    for clean, pure in zip(clean_list, pure_list):
        if re.findall(clause, clean):
            data_to_summarize.append(pure)

    text_data = " ".join(data_to_summarize)
    parser = PlaintextParser(text_data, Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)
    summarizer = KLSummarizer(stemmer)

    summary = summarizer(parser.document, SENTENCES_COUNT)

    if len(summary) == 0:
        summary = ["No Terms"]

    sentences = [str(x) for x in summary]
    message = HTML_OPEN + "<ul class='rolldown-list' id='myList'>"

    you_agree = []
    they_agree = []
    other_clause = []
    for sentence in sentences:
        # TODO: logging in the future
        lower = sentence.lower()
        you_idx = lower.find("you")
        they_idx = lower.find("we")
        if (you_idx == -1 or you_idx > 15) and (they_idx == -1
                                                or they_idx > 15):
            other_clause.append(sentence)
        elif you_idx == -1:
            they_agree.append(sentence)
        elif they_idx == -1:
            you_agree.append(sentence)
        elif you_idx < they_idx:
            you_agree.append(sentence)
        else:
            they_agree.append(sentence)

    if len(you_agree) > 0:
        message += YOU_AGREE_HEADER + "<li>"
        message += "</li><li>".join(you_agree)
        message += "</li>"

    if len(they_agree) > 0:
        message += THEY_AGREE_HEADER + "<li>"
        message += "</li><li>".join(they_agree)
        message += "</li>"

    if len(other_clause) > 0:
        message += OTHER_HEADER + "<li>"
        message += "</li><li>".join(other_clause)
        message += "</li>"

    message += "</ul></div>"

    return json.dumps(message)
Example #17
0
def summarize_url(url, summarizer):
    # E.G. url = "http://www.cnn.com/2016/06/12/politics/hillary-clinton-bernie-sanders-meeting-tuesday/index.html"
    print 'Summarizing ', url
    parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
    # or for plain text files
    # parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)

    if summarizer == 'luhn':
        summarizer = LuhnSummarizer(stemmer)
    elif summarizer == 'edmundson':
        summarizer = ESummarizer(stemmer)
    elif summarizer == 'lsa':
        summarizer = LsaSummarizer(stemmer)
    elif summarizer == 'lex':
        summarizer = LexSummarizer(stemmer)
    elif summarizer == 'text':
        summarizer = TextSummarizer(stemmer)
    elif summarizer == 'sb':
        summarizer = SumBasicSummarizer(stemmer)
    else:
        summarizer = KLSummarizer(stemmer)

    summarizer.stop_words = get_stop_words(LANGUAGE)
    print summarizer

    sentences = []
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print sentence
        sentences.append(str(sentence))

    return sentences
Example #18
0
def KLSummary(document, sentences):
    parser = PlaintextParser.from_string(document, Tokenizer("english"))
    summarizer = KLSummarizer()
    summary = summarizer(parser.document, sentences)
    results = []
    for sentence in summary:
        results.append(str(sentence))
    return results
 def __init__(self):
     """
     Oracle summariser is not an actual, usable summariser. It extracts the best sentences from the paper possible
     by comparing them to the gold summaries. It represents the high-water mark in what ROUGE score it is possible
     for a summariser to achieve.
     """
     self.summary_length = 10
     self.summariser = KLSummarizer()
Example #20
0
 def apply_text_model(self, text: str) -> []:
     parser = PlaintextParser.from_string(text, Tokenizer("english"))
     init_sents = text2sentences(parser._text)
     summarizer = KLSummarizer()
     summary = summarizer(parser.document, self.summary_n_sent)
     summary_ind = []
     for summary_sent in summary:
         summary_ind.append(init_sents.index(str(summary_sent)))
     return summary_ind
def generate_benchmark_summary(filename, num_summary):

    parser = PlaintextParser.from_file(
        'data/text_summary/' + filename + '.txt', Tokenizer("english"))
    print('=========== Basic Sum ============')
    Basic_Sum_sentences = []
    summarizer = SumBasicSummarizer()
    summary = summarizer(
        parser.document,
        num_summary)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence
        Basic_Sum_sentences.append(str(sentence))

    print('=========== LSA ============')
    LSA_sentences = []
    summarizer = LsaSummarizer()

    summary = summarizer(
        parser.document,
        num_summary)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence
        LSA_sentences.append(str(sentence))

    print('===========LexRank============')
    LexRank_sentences = []
    summarizer = LexRankSummarizer()
    summary = summarizer(
        parser.document,
        num_summary)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence
        LexRank_sentences.append(str(sentence))

    print('===========KL Divergence============')
    KL_sentences = []
    summarizer = KLSummarizer()
    summary = summarizer(
        parser.document,
        num_summary)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence
        KL_sentences.append(str(sentence))

    print('===========Luhn============')
    Luhn_sentences = []
    summarizer = LuhnSummarizer()
    summary = summarizer(
        parser.document,
        num_summary)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence
        Luhn_sentences.append(str(sentence))

    return Basic_Sum_sentences, LSA_sentences, LexRank_sentences, KL_sentences, Luhn_sentences
Example #22
0
def run_sumy(text, algo='KL', sent_count=6):
    # time0 = time.time()
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    # time1 = time.time()
    stemmer = Stemmer("english")
    # time2 = time.time()

    if algo == 'KL':
        summarizer = KLSummarizer(stemmer)
    elif algo == 'LexRank':
        summarizer = LexRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words("english")
    # time3 = time.time()

    summary_list = summarizer(parser.document, sent_count)
    # time4 = time.time()

    # print('Parse time: {} \t Stem time: {} \t Stop words time: {} \t Summarizer time: {}'.format(time1, time2, time3, time4))

    return summary_list
Example #23
0
 def train(self):
     """
     train model
     
     Args:
         input_text: str of input text
     Returns:
         model: initialised model
         parser: parser
     """
     self.model = KLSummarizer()
def get_summarizer():
    if EXTRACTIVE_SUMMARIZATION_ALGO == 'luhn':
        return LuhnSummarizer()
    elif EXTRACTIVE_SUMMARIZATION_ALGO == 'kl':
        return KLSummarizer()
    elif EXTRACTIVE_SUMMARIZATION_ALGO == 'lsa':
        return LsaSummarizer()
    elif EXTRACTIVE_SUMMARIZATION_ALGO == 'textrank':
        return TextRankSummarizer()
    elif EXTRACTIVE_SUMMARIZATION_ALGO == 'lexrank':
        return LexRankSummarizer()
    def __init__(self, language='english'):
        stemmer = Stemmer(language)

        self.language = language
        self.algorithms = {
            'kl': KLSummarizer(stemmer),
            'lex_rank': LexRankSummarizer(stemmer),
            'lsa': LsaSummarizer(stemmer),
            'text_rank': TextRankSummarizer(stemmer)
        }
        for alg in self.algorithms:
            self.algorithms[alg].stop_words = get_stop_words(language)
Example #26
0
def summarizeKL(text):
    parser_string = PlaintextParser.from_string(text, Tokenizer('english'))
    from sumy.summarizers.kl import KLSummarizer
    print('\nKL')
    summarizer = KLSummarizer()
    # Summarize the document with 2 sentences
    sentences = summarizer(parser_string.document, 2)
    #print('\nSummarizing text from string:')
    summary = ''
    for sent in sentences:
        summary += str(sent)
        summary += '\n'
    return summary
Example #27
0
def summarizeKL():
    from sumy.summarizers.kl import KLSummarizer
    print('\nKL')
    summarizer = KLSummarizer()
    # Summarize the document with 2 sentences
    summary = summarizer(parser_string.document, 2)
    print('\nSummarizing text from string:')
    for sent in summary:
        print(sent)

    print('\nSummarizing text from .txt file:')
    summary = summarizer(parser_file.document, 2)
    for sent in summary:
        print(sent)
Example #28
0
def evaluate_summary(file_name, input_dir, sent_count, lingua_franca_summary, show_summaries):
	method_name = inspect.stack()[0][3]
	try:
		process_logger.debug("in "+ method_name +" method")
		file_model_summary = open(input_dir + file_name +".model", "r")
		model_summary = file_model_summary.read()

		rouge_scores_dict = {}
		rouge_scores = rouge_evaluation(lingua_franca_summary, model_summary)
		rouge_scores_dict[">>LINGUA FRANCA"] = rouge_scores
		file_summary = open("Test System Summary/" + file_name + "-" + "LINGUA FRANCA" + ".txt", "w")
		file_summary.write(lingua_franca_summary)

		LANGUAGE = "english"
		parser = PlaintextParser.from_file(input_dir + file_name + ".txt", Tokenizer(LANGUAGE))
		stemmer = Stemmer(LANGUAGE)
		
		lsa_summarizer = LsaSummarizer(stemmer)
		rouge_scores = sumy_summarizers("LSA", lsa_summarizer, parser.document, sent_count, model_summary, show_summaries, file_name)
		rouge_scores_dict["LSA"] = rouge_scores		

		lex_summarizer = LexRankSummarizer(stemmer)
		rouge_scores = sumy_summarizers("LEX RANK", lex_summarizer, parser.document, sent_count, model_summary, show_summaries, file_name)
		rouge_scores_dict["LEX RANK"] = rouge_scores

		luhn_summarizer = LuhnSummarizer(stemmer)
		rouge_scores = sumy_summarizers("LUHN", luhn_summarizer, parser.document, sent_count, model_summary, show_summaries, file_name)
		rouge_scores_dict["LUHN"] = rouge_scores
		
		text_rank_summarizer = TextRankSummarizer(stemmer)
		rouge_scores = sumy_summarizers("TEXT RANK", text_rank_summarizer, parser.document, sent_count, model_summary, show_summaries, file_name)
		rouge_scores_dict["TEXT RANK"] = rouge_scores
		
		sum_basic_summarizer = SumBasicSummarizer(stemmer)
		rouge_scores = sumy_summarizers("SUM BASIC", sum_basic_summarizer, parser.document, sent_count, model_summary, show_summaries, file_name)
		rouge_scores_dict["SUM BASIC"] = rouge_scores
		
		kl_summarizer = KLSummarizer(stemmer)
		rouge_scores = sumy_summarizers("KL SUM", kl_summarizer, parser.document, sent_count, model_summary, show_summaries, file_name)
		rouge_scores_dict["KL SUM"] = rouge_scores
		
		# score_reader(rouge_scores_dict)
		df_rouge, summarizer_list = process_rouge_scores(rouge_scores_dict)

		return df_rouge, summarizer_list

	except Exception as Ex:
		error_logger.error("Exception occurred in " + method_name + "| Exception:" + str(Ex))
		return None
Example #29
0
def dada_summarize(content: str, title: str = "") -> dict :
    response = dict()
    content = process_content(content)
    title = process_title(title)
    
    # textrank [need newline to split sentence]
    response["textrank"] = summarize(content)
    
    # textteaser [need newline to split sentence]
    cnt = int(len(content.split('\n'))*0.3)
    response['textteaser'] = "\n".join(tt.summarize(title, content, count=cnt))
    
    ### sumy
    parser = PlaintextParser.from_string(content, tokenizer)
    
    # LSA
    summarizer = LsaSummarizer(stemmer)
    sentences = [str(i) for i in summarizer(parser.document, SENTENCES_COUNT)]
    response['lsa'] = "\n".join(sentences)
    
    # textrank2
    summarizer = TextRankSummarizer(stemmer)
    sentences = [str(i) for i in summarizer(parser.document, SENTENCES_COUNT)]
    response['textrank2'] = "\n".join(sentences)
    
    # lexrank
    summarizer = LexRankSummarizer(stemmer)
    sentences = [str(i) for i in summarizer(parser.document, SENTENCES_COUNT)]
    response['lexrank'] = "\n".join(sentences)
    
    # ruduction
    summarizer = ReductionSummarizer(stemmer)
    sentences = [str(i) for i in summarizer(parser.document, SENTENCES_COUNT)]
    response['reduction'] = "\n".join(sentences)
    
    #kl-sum
    summarizer = KLSummarizer(stemmer)
    sentences = [str(i) for i in summarizer(parser.document, SENTENCES_COUNT)]
    response['kl-sum'] = "\n".join(sentences)
    
    # bert
    response['bert'] = model(content, ratio=0.4)
    
    return response
Example #30
0
    def get_summarizers(self, names):
        """Retrieves sumy summarizers algorithms

            Parameters:
            names (list): list of summarizer algorithm names

            Returns:
            dict:summarizers

        """
        summarizers = {}
        for name in names:
            if name == "random":
                from sumy.summarizers.random import RandomSummarizer
                summarizers["random"] = RandomSummarizer(null_stemmer)
            elif name == "luhn":
                from sumy.summarizers.luhn import LuhnSummarizer
                summarizers["luhn"] = LuhnSummarizer(stemmer=null_stemmer)
            elif name == "lsa":
                from sumy.summarizers.lsa import LsaSummarizer
                summarizers["lsa"] = LsaSummarizer(stemmer=null_stemmer)
            elif name == "lexrank":
                from sumy.summarizers.lex_rank import LexRankSummarizer
                summarizers["lexrank"] = LexRankSummarizer(null_stemmer)
            elif name == "textrank":
                from sumy.summarizers.text_rank import TextRankSummarizer
                summarizers["textrank"] = TextRankSummarizer(null_stemmer)
            elif name == "sumbasic":
                from sumy.summarizers.sum_basic import SumBasicSummarizer
                summarizers["sumbasic"] = SumBasicSummarizer(null_stemmer)
            elif name == "kl-sum":
                from sumy.summarizers.kl import KLSummarizer
                summarizers["kl-sum"] = KLSummarizer(null_stemmer)
            elif name == "reduction":
                from sumy.summarizers.reduction import ReductionSummarizer
                summarizers["reduction"] = ReductionSummarizer(null_stemmer)

        for _, summarizer in summarizers.items():
            summarizer.stop_words = frozenset(
                self.stop_words._get_stop_words(custom_stop_words=[]))

        return summarizers
Example #31
0
def sumy_summarizer(text, ratio, summarizer_type):
    num_sent = int(len(text.split(".")) * ratio)
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    if ((summarizer_type == 'lexrank') or (summarizer_type == 'Lexrank')):
        summarizer_instance = LexRankSummarizer()
    elif ((summarizer_type == 'reduction')
          or (summarizer_type == 'reduction')):
        summarizer_instance = ReductionSummarizer()
    elif ((summarizer_type == 'lsa') or (summarizer_type == 'LSA')):
        summarizer_instance = LsaSummarizer()
    elif ((summarizer_type == 'luhn') or (summarizer_type == 'Luhn')):
        summarizer_instance = LuhnSummarizer()
    elif ((summarizer_type == 'KL') or (summarizer_type == 'kl')):
        summarizer_instance = KLSummarizer()
    summary_values = summarizer_instance(parser.document, num_sent)
    final_summary = []
    for sent in summary_values:
        final_summary.append(str(sent))
    summary_values = convert_to_string(final_summary)
    return summary_values
def summary_benchmarks(sentences_string):
    '''
    :param sentences_string: all sentences as one string, has been tokenized
    :return:
    '''
    parser = PlaintextParser.from_string(sentences_string,
                                         Tokenizer("english"))
    print('=========== Basic Sum ============')
    summarizer = SumBasicSummarizer()
    summary = summarizer(parser.document,
                         3)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence

    print('=========== LSA ============')
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document,
                         3)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence

    print('===========LexRank============')
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document,
                         3)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence

    print('===========KL Divergence============')
    summarizer = KLSummarizer()
    summary = summarizer(parser.document,
                         3)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence

    print('===========Luhn============')
    summarizer = LuhnSummarizer()
    summary = summarizer(parser.document,
                         3)  # Summarize the document with 5 sentences
    for sentence in summary:
        print sentence
Example #33
0
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer 
from sumy.summarizers.kl import KLSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import sys


LANGUAGE = "english"
SENTENCES_COUNT = int(sys.argv[2])
text_file = sys.argv[1]


if __name__ == "__main__":
    
    parser = PlaintextParser.from_file(text_file, Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)

    summarizer = KLSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)

Example #34
0
def summarizer(stop_words):
    summarizer = KLSummarizer()
    summarizer.stop_words = stop_words
    return summarizer