def 처지(self): tr = TextRank() from konlpy.tag import Komoran tagger = Komoran() stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV')]) tr.loadSents( RawSentenceReader('x.txt'), lambda sent: filter( lambda x: x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent))) tr.build() ranks = tr.rank() if tr.summarize(0.4) is None: return "모름" else: return tr.summarize(0.4)
def clck_summary(self): sents = pdf_to_text(self.file_name) textrank = TextRank(language=self.language, tokenizer=None, stopwords=STOPWORDS) keysents = textrank.summarize(sents, topk=5) self.ui.textBrowser.setText("\n".join(keysents))
def summary(self): if self.parent.file_path != '': out = None if str(self.parent.comboBox.currentText()) == 'File': self.parent.text = open(self.parent.file_path, 'r').read() elif str(self.parent.comboBox.currentText()) == 'URL': if str(self.parent.internet_status.text()) == 'OFFLINE': return self.parent.text = get_text(self.parent.file_path) if self.parent.set_algorithm == 'FS': out = fs(self.parent.text, self.parent.set_language, int(self.parent.set_count)) elif self.parent.set_algorithm == 'TextRank': tr = TextRank(self.parent.text, int(self.parent.set_count), self.parent.set_language, self.parent.set_metric, self.parent.set_graph) out = tr.summarize() self.parent.out = out
def click_summary(self): self.language = str(self.ui.comboBox.currentText()) self.top_k_word = int(self.ui.comboBox_2.currentText()) self.top_k_sent = int(self.ui.comboBox_3.currentText()) sents = pdf_to_text(self.file_name) # print(self.language, self.top_k_word, self.top_k_sent) if self.language == "ko": textrank = TextRank(language=self.language, tokenizer="mecab", stopwords=STOPWORDS) else: textrank = TextRank(language=self.language, tokenizer=None, stopwords=STOPWORDS) keywords = textrank.keywords(sents, topk=self.top_k_word) keysents = textrank.summarize(sents, topk=self.top_k_sent) self.ui.textBrowser.setText("\n".join(keysents)) self.ui.textBrowser_2.setText(", ".join(keywords))
from textrank import TextRank, RawSentenceReader from konlpy.tag import Kkma import sys filename = sys.argv[1] rate = float(sys.argv[2]) tr = TextRank() #print('Load...') from konlpy.tag import Komoran tagger = Komoran() stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV') ]) tr.loadSents(RawSentenceReader(filename), lambda sent: filter(lambda x:x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent))) #print('Build...') tr.build() ranks = tr.rank() #for k in sorted(ranks, key=ranks.get, reverse=True)[:100]: #print("\t".join([str(k), str(ranks[k]), str(tr.dictCount[k])])) sentence = '%s.' % (tr.summarize(rate).split('. ')[0]) kkma = Kkma() print(sentence) print(list(x[0] for x in (list(filter(lambda x: x[1][0] == 'N', kkma.pos(sentence))))))
min_sim=args.min_sim, tokenizer=args.tokenizer, noun=args.noun, similarity=args.similarity, df=args.df, method=args.method, stopwords=None, ) data = get_data(args.test_path) output_path = args.output_path hyp_path = f"{output_path}/hyp" abs_ref_path = f"{output_path}/abs_ref" if not os.path.exists(output_path): os.makedirs(output_path) if not os.path.exists(hyp_path): os.makedirs(hyp_path) if not os.path.exists(abs_ref_path): os.makedirs(abs_ref_path) for articles in tqdm(data): doc_id, sents, gold = articles hyp = model.summarize(sents, args.topk) with open(f"{abs_ref_path}/{doc_id}.txt", "w", encoding="utf8") as f: f.write(gold) with open(f"{hyp_path}/{doc_id}.txt", "w", encoding="utf8") as f: f.write(hyp)
# coding: utf-8 from textrank import TextRank #textrank 모듈 불러오기 f = open("text.txt", 'r', encoding='utf-8') #stopwords 템플릿 text = f.read() tr = TextRank(text) #textrank 실행 f.close() i = 1 for row in tr.summarize(3): #요약된 문장과 키워드 출력 print(str(i) + '. ' + row) i += 1 print('keywords :', tr.keywords())
from textrank import TextRank from article import get_text text = get_text('https://www.bbc.com/news/world-us-canada-47848619') tr = TextRank(text, lang='english', metric='log', graph='HITS') tr.summarize() ''' Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post. In February Mr Sakurada had to make another apology, after arriving three minutes late to a parliamentary meeting. "I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe. It is not the first time Mr Sakurada has been forced to apologise. After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him. Image copyright AFP Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011. Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister. Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post. "I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe. It is not the first time Mr Sakurada has been forced to apologise. After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him. Image copyright AFP Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011. The 2011 tsunami left more than 20,000 dead and caused a meltdown at the Fukushima Daiichi nuclear plant. Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister. '''
else: sents = get_data("data/sents.txt", "news") # stopwords of korean stopwords = ["뉴스", "기자", "그리고", "연합뉴스"] # initialize Textrank textrank = TextRank( min_count=args.min_count, min_sim=args.min_sim, tokenizer=args.tokenizer, noun=args.noun, similarity=args.similarity, df=args.df, max_iter=args.max_iter, method=args.method, stopwords=stopwords, ) # extraction setences or keywords if args.mode == "sentences": results = textrank.summarize(sents, topk=args.topk) results = [sent for _, sent in results] results = "\n".join(results) else: args.mode = "words" results = textrank.keywords(sents, topk=args.topk) print(f"{args.mode}") print("=" * 20) print(f"{results}")