Python TextRank.summarize Exemples, textrank.TextRank.summarize Python Exemples

Exemple #1

0

Afficher le fichier

 def 처지(self):
     tr = TextRank()
     from konlpy.tag import Komoran
     tagger = Komoran()
     stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV')])
     tr.loadSents(
         RawSentenceReader('x.txt'), lambda sent: filter(
             lambda x: x not in stopword and x[1] in
             ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent)))
     tr.build()
     ranks = tr.rank()
     if tr.summarize(0.4) is None:
         return "모름"
     else:
         return tr.summarize(0.4)

Exemple #2

0

Afficher le fichier

Fichier : textrank_v01.py Projet : ExcelsiorCJH/textrank

    def clck_summary(self):
        sents = pdf_to_text(self.file_name)

        textrank = TextRank(language=self.language,
                            tokenizer=None,
                            stopwords=STOPWORDS)
        keysents = textrank.summarize(sents, topk=5)

        self.ui.textBrowser.setText("\n".join(keysents))

Exemple #3

0

Afficher le fichier

Fichier : ats.py Projet : agikarasugi/FS-n-TextRank-Text-Summarizer

    def summary(self):
        if self.parent.file_path != '':
            out = None

            if str(self.parent.comboBox.currentText()) == 'File':
                self.parent.text = open(self.parent.file_path, 'r').read()
            elif str(self.parent.comboBox.currentText()) == 'URL':
                if str(self.parent.internet_status.text()) == 'OFFLINE':
                    return
                self.parent.text = get_text(self.parent.file_path)

            if self.parent.set_algorithm == 'FS':
                out = fs(self.parent.text, self.parent.set_language,
                         int(self.parent.set_count))
            elif self.parent.set_algorithm == 'TextRank':
                tr = TextRank(self.parent.text, int(self.parent.set_count),
                              self.parent.set_language, self.parent.set_metric,
                              self.parent.set_graph)
                out = tr.summarize()

            self.parent.out = out

Exemple #4

0

Afficher le fichier

Fichier : textrank_v02.py Projet : ExcelsiorCJH/textrank

    def click_summary(self):
        self.language = str(self.ui.comboBox.currentText())
        self.top_k_word = int(self.ui.comboBox_2.currentText())
        self.top_k_sent = int(self.ui.comboBox_3.currentText())

        sents = pdf_to_text(self.file_name)

        # print(self.language, self.top_k_word, self.top_k_sent)
        if self.language == "ko":
            textrank = TextRank(language=self.language,
                                tokenizer="mecab",
                                stopwords=STOPWORDS)
        else:
            textrank = TextRank(language=self.language,
                                tokenizer=None,
                                stopwords=STOPWORDS)

        keywords = textrank.keywords(sents, topk=self.top_k_word)
        keysents = textrank.summarize(sents, topk=self.top_k_sent)

        self.ui.textBrowser.setText("\n".join(keysents))
        self.ui.textBrowser_2.setText(", ".join(keywords))

Exemple #5

0

Afficher le fichier

from textrank import TextRank, RawSentenceReader
from konlpy.tag import Kkma
import sys

filename = sys.argv[1]
rate = float(sys.argv[2])

tr = TextRank()
#print('Load...')
from konlpy.tag import Komoran
tagger = Komoran()
stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV') ])
tr.loadSents(RawSentenceReader(filename), lambda sent: filter(lambda x:x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent)))
#print('Build...')
tr.build()
ranks = tr.rank()
#for k in sorted(ranks, key=ranks.get, reverse=True)[:100]:
    #print("\t".join([str(k), str(ranks[k]), str(tr.dictCount[k])]))

sentence = '%s.' % (tr.summarize(rate).split('. ')[0])

kkma = Kkma()
print(sentence)
print(list(x[0] for x in (list(filter(lambda x: x[1][0] == 'N', kkma.pos(sentence))))))

Exemple #6

0

Afficher le fichier

Fichier : test.py Projet : taebbang/study

        min_sim=args.min_sim,
        tokenizer=args.tokenizer,
        noun=args.noun,
        similarity=args.similarity,
        df=args.df,
        method=args.method,
        stopwords=None,
    )

    data = get_data(args.test_path)

    output_path = args.output_path
    hyp_path = f"{output_path}/hyp"
    abs_ref_path = f"{output_path}/abs_ref"

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    if not os.path.exists(hyp_path):
        os.makedirs(hyp_path)
    if not os.path.exists(abs_ref_path):
        os.makedirs(abs_ref_path)

    for articles in tqdm(data):
        doc_id, sents, gold = articles

        hyp = model.summarize(sents, args.topk)

        with open(f"{abs_ref_path}/{doc_id}.txt", "w", encoding="utf8") as f:
            f.write(gold)
        with open(f"{hyp_path}/{doc_id}.txt", "w", encoding="utf8") as f:
            f.write(hyp)

Exemple #7

0

Afficher le fichier

# coding: utf-8
from textrank import TextRank  #textrank 모듈 불러오기

f = open("text.txt", 'r', encoding='utf-8')  #stopwords 템플릿
text = f.read()
tr = TextRank(text)  #textrank 실행
f.close()
i = 1
for row in tr.summarize(3):  #요약된 문장과 키워드 출력
    print(str(i) + '. ' + row)
    i += 1
print('keywords :', tr.keywords())

Exemple #8

0

Afficher le fichier

from textrank import TextRank
from article import get_text

text = get_text('https://www.bbc.com/news/world-us-canada-47848619')
tr = TextRank(text, lang='english', metric='log', graph='HITS')
tr.summarize()


'''
Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post.
In February Mr Sakurada had to make another apology, after arriving three minutes late to a parliamentary meeting.
"I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe.
It is not the first time Mr Sakurada has been forced to apologise.
After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him.
Image copyright AFP  Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011.
Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister.

Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post.
"I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe.
It is not the first time Mr Sakurada has been forced to apologise.
After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him.
Image copyright AFP  Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011.
The 2011 tsunami left more than 20,000 dead and caused a meltdown at the Fukushima Daiichi nuclear plant.
Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister.

'''

Exemple #9

0

Afficher le fichier

Fichier : main.py Projet : ExcelsiorCJH/textrank

    else:
        sents = get_data("data/sents.txt", "news")
        # stopwords of korean
        stopwords = ["뉴스", "기자", "그리고", "연합뉴스"]

    # initialize Textrank
    textrank = TextRank(
        min_count=args.min_count,
        min_sim=args.min_sim,
        tokenizer=args.tokenizer,
        noun=args.noun,
        similarity=args.similarity,
        df=args.df,
        max_iter=args.max_iter,
        method=args.method,
        stopwords=stopwords,
    )

    # extraction setences or keywords
    if args.mode == "sentences":
        results = textrank.summarize(sents, topk=args.topk)
        results = [sent for _, sent in results]
        results = "\n".join(results)
    else:
        args.mode = "words"
        results = textrank.keywords(sents, topk=args.topk)

    print(f"{args.mode}")
    print("=" * 20)
    print(f"{results}")