Python TextRank.TextRank Examples

Programming Language: Python

Class/Type: TextRank

Method/Function: TextRank

Examples at hotexamples.com: 5

Python TextRank.TextRank - 5 examples found. These are the top rated real world Python examples of TextRank.TextRank from package Codelib extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TextRank(5)

extractKeyphrases(3)

Get_sample_news(1)

SentenceTokenizer(1)

calPR(1)

createMatrix(1)

createNodes(1)

cutSentence(1)

extractSentencesFromSentenceTokens(1)

ezRank(1)

get_sent_scores(1)

Example #1

Show file

    def base_vectorize(self, index, link):
        try:
            Basesummarizes = []
            print(link)
            textrank = TextRank.TextRank(link)

            summarizes = textrank.summarize(10)
            keywords = textrank.keywords()

            for sentence in summarizes:
                Basesummarizes.append(sentence)

            for sentence in textrank.sentences:
                for word in sentence.split(" "):
                    if word in self.__keyword:
                        Basesummarizes.append(sentence)
                        break

            flag = 0
            for keyword in keywords:
                if keyword in self.__keyword:
                    flag = 1
                    break

            if flag == 0:
                print("검색어가 키워드에 없습니다.")
                return

            self.__validation.sum_str(
                self.__sentenceTokenizer.get_nouns(Basesummarizes))
            self.__validation.set_dic(index, 0)
        except Exception as e:
            print(e)
            print('textrank not working')
            return

        self.printCommand(index, link, summarizes, keywords)

        self.__linkDict[index] = link
        self.__sentenceDict[index] = summarizes
        self.__keywordDict[index] = keywords

        self.__distanceDict = self.__validation.get_dic()

        self.__observer.resultToGui()

Example #2

Show file

File: run.py Project: NekoGaHoshiyi/textranktool

def cal_textrank(window, alpha):
    # with open('停用词表.txt', 'r', encoding='utf-8') as ban:
    #     banlist = ban.read().splitlines()
    win = int(window)
    alpha = float(alpha)
    with open('./original/corpus1.txt', 'r', encoding='utf-8') as f:
        s = f.read().replace('\n', '').strip()
        tr = TextRank(s, win, alpha, 700)
        tr.cutSentence()
        tr.createNodes()
        tr.createMatrix()
        tr.calPR()
        tr.output_matrix()
        res = tr.printResult()
    textrank = ''
    for item in res:
        # if item[0].strip() in banlist:
        #     continue
        s = str(tr.word_index[item[0]])+','+str(item).replace('(','').replace(')','').replace('\'','')+'\n'
        textrank+=s
    with open('./textrank.txt', 'w', encoding='utf-8') as w:
        w.write(textrank)

Example #3

Show file

    def target_vectorize(self, targetIndex, targetLink):
        try:
            textrank = TextRank.TextRank(targetLink)
            summarizes = textrank.summarize(10)
            keywords = textrank.keywords()

            flag = 0
            for keyword in keywords:
                if keyword in self.__keyword:
                    flag = 1
                    break

            if flag == 0:
                print("검색어가 키워드에 없습니다.")
                return

            self.__validation.target_vectorizing(
                self.__sentenceTokenizer.get_nouns(summarizes))

            distance = self.__validation.dist_norm()

            if math.isnan(distance) == True:
                raise ValueError

            self.__validation.set_dic(targetIndex, distance)
        except:
            print('textrank not working')
            return

        self.printCommand(targetIndex, targetLink, summarizes, keywords,
                          distance)

        self.__linkDict[targetIndex] = targetLink
        self.__sentenceDict[targetIndex] = summarizes
        self.__keywordDict[targetIndex] = keywords
        self.__distanceDict = self.__validation.get_dic()

        self.__observer.resultToGui()

Example #4

Show file

nerInPyltp = loadNerDictFromPyltp('pyltp_savebox.txt')

partOfSpeechDict = loadWordsPartOfSpeech("spdict.txt")
nerDict = loadPreTrainEntityDict('lexiconAndNerDictWithInfo.txt')

# 打开训练数据集
f = codecs.open("coreEntityEmotion_train.txt", 'r', 'utf-8')

# 设置输出文件
outputname = "entityOutPut_originCut-pyltp_full_v3"
fout = codecs.open(outputname + ".txt", 'w', 'utf-8')
fout_cache = codecs.open(outputname + "_datacache.txt", 'w', 'utf-8')

#加载TextRank
trDemo = TextRank.TextRank()

# 分析过程
i = 0
for rawline in f.readlines():
    # 按行分析
    rawline_json = json.loads(rawline)
    # 获取标题行
    titleline = rawline_json['title']
    # 获取实体
    entity = set()
    eec = rawline_json["coreEntityEmotions"]
    for key in eec:
        entity.add(key["entity"])
    # 获取标题分词
    titleWords = segmentor.segment(titleline)

Example #5

Show file

File: Summarizer.py Project: spinosajoseph/Automatic-Summarizer

    except:
        print("Input must be a natural number 0-100!")
        continue

    if not (compression > 0 and compression < 100):
        print("Out of bounds, try again")

num_of_sentences = int((compression/100) * article_dict["LENGTH"])
if num_of_sentences == 0:
    print("The desired compression rate for this article resulted in a zero sentence summary. Please try"
          " again with a higher rate of compression")
    exit()

edmundson = Edmundson(article_dict)
rhetoric = ExtractedArticle(article_dict)
textrank = TextRank(article_dict["BODY"])

master_scores =  list(map(sum, zip(edmundson.get_sent_scores(custom_settings),rhetoric.get_sent_scores(custom_settings),textrank.get_sent_scores())))
preliminary_indices = sorted(range(len(master_scores)), key=lambda i: master_scores[i])[-(num_of_sentences):]
master_indices = sorted(preliminary_indices)


print("Display Summary: \n")

for index in master_indices:
    print(article_dict["BODY"][index])
    summary += article_dict["BODY"][index]

summary = summary + "\n\nThis summary was generated using: " + active_pickle_file + "\n" + "Source shrunk from " + str(article_dict['LENGTH']) + ' sentences to ' + str(num_of_sentences) + " sentences" + " (" + str(compression) + "%)"

os.chdir(Summarypath)