def simTextRank(filePath, summarySentenceCount): ''' summary generation using similarity between sentences ''' global sentence_dictionary sentence_dictionary = {} sentences = [] sentence_dictionary, sentences, size = cleanText( filePath) #input after preprocessing graph = generateGraph(list( sentence_dictionary.keys())) #keys are sentence ids pageRank = networkx.pagerank( graph) #computes ranking of nodes in graph,return type is a dictionary output = "\n".join([ sentences[sentenceID] for sentenceID in sorted( sorted(pageRank, key=pageRank.get, reverse=True) [:summarySentenceCount]) ]) with open(os.path.join(app.config['DOWNLOAD_FOLDER'], 'sim_textrank.txt'), "w", encoding="utf-8") as outFile: outFile.write(output) outFile.close()
def process(arg1, arg2, arg3): ''' :param arg1: path to the file containing the text to be summarized :param arg2: Number of sentences to be extracted as summary :param arg3: size of the window to be used in the co-occurance relation ''' global window, n, numberofSentences, textRank, sentenceDictionary, size, sentences if arg1 != None and arg2 != None and arg3 != None: sentenceDictionary, sentences, size = cleanText(arg1) window = int(arg3) numberofSentences = int(arg2) n = int(math.ceil(min(0.1 * size, 7 * math.log(size)))) generatepositionaldistribution() keyphrases = textrank() summarize(arg1, keyphrases, numberofSentences) else: print("not enough parameters")
def textRankSimilarity(filePath, summarySentenceCount): global sentenceDictionary sentenceDictionary = {} sentences = [] sentenceDictionary, sentences, size = cleanText(filePath) #printDictionary() graph = generateGraph(list(sentenceDictionary.keys())) pageRank = networkx.pagerank(graph) output = "\n".join([ sentences[sentenceID] for sentenceID in sorted( sorted(pageRank, key=pageRank.get, reverse=True) [:summarySentenceCount]) ]) print("\nSummary:") print(output) with io.open("../Marathi/summaries/" + (filePath).split('/')[-1] + "_TextRankSimilaritySummarizer", "w", encoding='utf-8') as outFile: outFile.write(output) outFile.close()