def simTextRank(filePath, summarySentenceCount):
    '''
    summary generation using similarity between sentences
    '''
    global sentence_dictionary
    sentence_dictionary = {}
    sentences = []

    sentence_dictionary, sentences, size = cleanText(
        filePath)  #input after preprocessing

    graph = generateGraph(list(
        sentence_dictionary.keys()))  #keys are sentence ids

    pageRank = networkx.pagerank(
        graph)  #computes ranking of nodes in graph,return type is a dictionary

    output = "\n".join([
        sentences[sentenceID] for sentenceID in sorted(
            sorted(pageRank, key=pageRank.get, reverse=True)
            [:summarySentenceCount])
    ])

    with open(os.path.join(app.config['DOWNLOAD_FOLDER'], 'sim_textrank.txt'),
              "w",
              encoding="utf-8") as outFile:
        outFile.write(output)
        outFile.close()
예제 #2
0
def process(arg1, arg2, arg3):
    '''
    :param arg1: path to the file containing the text to be summarized
    :param arg2: Number of sentences to be extracted as summary
    :param arg3: size of the window to be used in the co-occurance relation
    '''
    global window, n, numberofSentences, textRank, sentenceDictionary, size, sentences
    if arg1 != None and arg2 != None and arg3 != None:
        sentenceDictionary, sentences, size = cleanText(arg1)
        window = int(arg3)
        numberofSentences = int(arg2)
        n = int(math.ceil(min(0.1 * size, 7 * math.log(size))))
        generatepositionaldistribution()
        keyphrases = textrank()
        summarize(arg1, keyphrases, numberofSentences)
    else:
        print("not enough parameters")
def textRankSimilarity(filePath, summarySentenceCount):
    global sentenceDictionary
    sentenceDictionary = {}
    sentences = []
    sentenceDictionary, sentences, size = cleanText(filePath)
    #printDictionary()
    graph = generateGraph(list(sentenceDictionary.keys()))
    pageRank = networkx.pagerank(graph)
    output = "\n".join([
        sentences[sentenceID] for sentenceID in sorted(
            sorted(pageRank, key=pageRank.get, reverse=True)
            [:summarySentenceCount])
    ])

    print("\nSummary:")
    print(output)

    with io.open("../Marathi/summaries/" + (filePath).split('/')[-1] +
                 "_TextRankSimilaritySummarizer",
                 "w",
                 encoding='utf-8') as outFile:
        outFile.write(output)
        outFile.close()