예제 #1
0
def cal_textrank(window, alpha):
    # with open('停用词表.txt', 'r', encoding='utf-8') as ban:
    #     banlist = ban.read().splitlines()
    win = int(window)
    alpha = float(alpha)
    with open('./original/corpus1.txt', 'r', encoding='utf-8') as f:
        s = f.read().replace('\n', '').strip()
        tr = TextRank(s, win, alpha, 700)
        tr.cutSentence()
        tr.createNodes()
        tr.createMatrix()
        tr.calPR()
        tr.output_matrix()
        res = tr.printResult()
    textrank = ''
    for item in res:
        # if item[0].strip() in banlist:
        #     continue
        s = str(tr.word_index[item[0]])+','+str(item).replace('(','').replace(')','').replace('\'','')+'\n'
        textrank+=s
    with open('./textrank.txt', 'w', encoding='utf-8') as w:
        w.write(textrank)