def clck_summary(self): sents = pdf_to_text(self.file_name) textrank = TextRank(language=self.language, tokenizer=None, stopwords=STOPWORDS) keysents = textrank.summarize(sents, topk=5) self.ui.textBrowser.setText("\n".join(keysents))
def summarize4(sents, docs=None): if not docs: docs = [list(Tokenize(sent)) for sent in sents] sim_res = bm25_weights(docs) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in sorted(rank.top_index(3)): top_n_summary.append(sents[index]) return u'。 '.join(top_n_summary).replace('\r', '').replace('\n', '') + u'。'
def summarize4(sents, docs=None): if not docs: docs = [list(Tokenize(sent)) for sent in sents] sim_res = bm25_weights(docs) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in sorted(rank.top_index(3)): top_n_summary.append(sents[index]) return u"。 ".join(top_n_summary).replace("\r", "").replace("\n", "") + u"。"
def get_summary(self, texts, n=3): texts = self.get_sentences(texts) doc_sents = [jieba.lcut(i) for i in texts] rank = TextRank(doc_sents) rank.text_rank() results = [] for j in range(len(texts)): if j in rank.top_index(n): results.append(texts[j]) summary = "。".join(results) + "。" return summary
def summary(self, doc, title=None, use_textrank_keysent=False): """输出文本摘要和关键词。""" # 处理输入 sent_para = split_to_sentence(doc) self.sent_list = [ sent.strip() for sent in chain.from_iterable(sent_para) ] sent_num = len(self.sent_list) with_title = False if title: self.sent_list.append(title) with_title = True pos_sent_weight = get_position_weight(sent_para) del sent_para # embedding计算 sent_vecs, title_vec, doc_vec, total_tokens = self.__cal_sentences_vec_mat( with_title) # 由于lda从文档中抽象出topic实际上时对语义信息的另一种建模,不加入sentence embedding算法实现 topic_dist, topic_words_dist = self.get_topic_distribution( total_tokens) topics_vec = self.__cal_topic_embedding(topic_dist, topic_words_dist) # keyword textrank = TextRank() self.keywords = textrank.get_keywords(doc) # 计算得分 scores = self.__cal_score(sent_vecs, doc_vec, topics_vec, title_vec, pos_sent_weight, sent_num) score_smooth = self.__score_smooth(scores, sent_num) # 排序 sorted_idx = np.argsort(score_smooth)[-sent_num // 3:] sent_ids = sorted(sorted_idx) if self.debug: print('key words: ', self.keywords) print('position weight: ', pos_sent_weight) print('score:', scores) print('score smooth:', score_smooth) for i in sent_ids: print(self.sent_list[i]) if use_textrank_keysent: keysentence = textrank.get_keysentences(doc) print('textrank keysentence: ', keysentence) return ''.join([self.sent_list[i] for i in sent_ids]), \ ';'.join([w for w, _ in self.keywords])
def sentence(mongo, redis, tagger, data, bulk_op): start_time = time.time() logging.debug("sentence process start time : %f" % (start_time)) singlewords = get_singlewords() coef = load_config()['coef'] title_word_addition_multiplier = load_config( )['title_word_addition_multiplier'] minimum_low_freq = load_config()['minimum_low_freq'] low_freq_word_subtraction_multiplier = load_config( )['low_freq_word_subtraction_multiplier'] nnp_addition_multiplier = load_config()['nnp_addition_multiplier'] # get keywords, sentences using textrank algorithm for idx, (URI, title, content, root_domain, wordcount) in enumerate(data): # get stopwords from redis stopwords = get_stopwords(redis, root_domain) tr = TextRank( tagger=tagger, window=5, content=content, stopwords=stopwords, singlewords=singlewords, title=title, coef=coef, title_word_addition_multiplier=title_word_addition_multiplier, minimum_low_freq=minimum_low_freq, low_freq_word_subtraction_multiplier= low_freq_word_subtraction_multiplier, nnp_addition_multiplier=nnp_addition_multiplier) # build sentence graph tr.sentence_rank() # wordcount의 개수에 따라 요약율 변경 summarize_rate = 0.3 if wordcount < 500: summarize_rate = 0.3 elif wordcount <= 1000: summarize_rate = 0.3 elif wordcount <= 2000: summarize_rate = 0.2 elif wordcount <= 3000: summarize_rate = 0.1 # get sentence sentences = tr.sentences(summarize_rate) sys.stdout.write("\rsentence extracted: %d / %d" % (idx, len(data))) mongo.bulk_insert_sentences(bulk_op, URI, sentences, summarize_rate) end_time = time.time() logging.debug("sentence process end time : %f" % (end_time)) logging.debug("total execute time : %f" % (end_time - start_time))
def get_summaries(self, limit=5): doc = [] sentences = self.get_sentences() for sentence in sentences: words = list(self.seg.seg(sentence)) words = self.filter_stop(words) doc.append(words) self.textrank = TextRank(doc) self.textrank.solve() result = [] for index in self.textrank.top_index(limit): result.append(sentences[index]) return result
def __init__(self): self.textranker = TextRank() self.ners = ['PERSON', 'ORG', 'GPE'] self.ner_dict = { 'PERSON': 'Person', # People, including fictional 'ORG': 'Organization', # Companies, agencies, institutions, etc. 'GPE': 'Location', # Countries, cities, states. } # dependency markers for subjects self.SUBJECTS = { "nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl" } # dependency markers for objects self.OBJECTS = {"dobj", "dative", "attr", "oprd"} self.graph_shower = GraphShow()
def 처지(self): tr = TextRank() from konlpy.tag import Komoran tagger = Komoran() stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV')]) tr.loadSents( RawSentenceReader('x.txt'), lambda sent: filter( lambda x: x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent))) tr.build() ranks = tr.rank() if tr.summarize(0.4) is None: return "모름" else: return tr.summarize(0.4)
def summary(self): if self.parent.file_path != '': out = None if str(self.parent.comboBox.currentText()) == 'File': self.parent.text = open(self.parent.file_path, 'r').read() elif str(self.parent.comboBox.currentText()) == 'URL': if str(self.parent.internet_status.text()) == 'OFFLINE': return self.parent.text = get_text(self.parent.file_path) if self.parent.set_algorithm == 'FS': out = fs(self.parent.text, self.parent.set_language, int(self.parent.set_count)) elif self.parent.set_algorithm == 'TextRank': tr = TextRank(self.parent.text, int(self.parent.set_count), self.parent.set_language, self.parent.set_metric, self.parent.set_graph) out = tr.summarize() self.parent.out = out
def keyword(mongo, redis, tagger, data, bulk_op): start_time = time.time() logging.debug("keyword extraction start time : %f" % (start_time)) singlewords = get_singlewords() coef = load_config()['coef'] title_word_addition_multiplier = load_config( )['title_word_addition_multiplier'] minimum_low_freq = load_config()['minimum_low_freq'] nnp_addition_multiplier = load_config()['nnp_addition_multiplier'] low_freq_word_subtraction_multiplier = load_config( )['low_freq_word_subtraction_multiplier'] for idx, (URI, title, content, root_domain, wordcount) in enumerate(data): # get stopwords from redis stopwords = get_stopwords(redis, root_domain) tr = TextRank( tagger=tagger, window=5, content=content, stopwords=stopwords, singlewords=singlewords, title=title, coef=coef, title_word_addition_multiplier=title_word_addition_multiplier, minimum_low_freq=minimum_low_freq, low_freq_word_subtraction_multiplier= low_freq_word_subtraction_multiplier) # build keyword graph tr.keyword_rank() # get keyword 키워드의 개수는 최대 15개로 제한 keywords = tr.keywords(num=15) sys.stdout.write("\rkeyword extracted: %d / %d" % (idx, len(data))) mongo.bulk_insert_keywords(bulk_op, URI, keywords) end_time = time.time() logging.debug("keyword extraction end time : %f" % (end_time)) logging.debug("total execution time : %f" % (end_time - start_time))
def analyze(): if (request.method == 'POST'): payload = request.get_json() input_text = payload["text"] t = TextRank(input_text) t.analyze(50) t.generate_cloud().to_file("temp.png") return send_file("temp.png", mimetype='image/png')
def 심정(self): tr = TextRank(window=5, coef=1) stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV'), ('없', 'VV')]) tr.load( RawTaggerReader('x.txt'), lambda w: w not in stopword and (w[1] in ('NNG', 'NNP', 'VV', 'VA'))) tr.build() kw = tr.extract(0.4) if kw is None: return "모름" else: return kw
def click_summary(self): self.language = str(self.ui.comboBox.currentText()) self.top_k_word = int(self.ui.comboBox_2.currentText()) self.top_k_sent = int(self.ui.comboBox_3.currentText()) sents = pdf_to_text(self.file_name) # print(self.language, self.top_k_word, self.top_k_sent) if self.language == "ko": textrank = TextRank(language=self.language, tokenizer="mecab", stopwords=STOPWORDS) else: textrank = TextRank(language=self.language, tokenizer=None, stopwords=STOPWORDS) keywords = textrank.keywords(sents, topk=self.top_k_word) keysents = textrank.summarize(sents, topk=self.top_k_sent) self.ui.textBrowser.setText("\n".join(keysents)) self.ui.textBrowser_2.setText(", ".join(keywords))
args.tokenizer = None # stopwords of english stopwords = stopwords.words("english") stopwords += [",", "-", ":", ";", "!", "?", "'", '"'] else: sents = get_data("data/sents.txt", "news") # stopwords of korean stopwords = ["뉴스", "기자", "그리고", "연합뉴스"] # initialize Textrank textrank = TextRank( min_count=args.min_count, min_sim=args.min_sim, tokenizer=args.tokenizer, noun=args.noun, similarity=args.similarity, df=args.df, max_iter=args.max_iter, method=args.method, stopwords=stopwords, ) # extraction setences or keywords if args.mode == "sentences": results = textrank.summarize(sents, topk=args.topk) results = [sent for _, sent in results] results = "\n".join(results) else: args.mode = "words" results = textrank.keywords(sents, topk=args.topk)
from textrank import TextRank, RawSentenceReader from konlpy.tag import Kkma import sys filename = sys.argv[1] rate = float(sys.argv[2]) tr = TextRank() #print('Load...') from konlpy.tag import Komoran tagger = Komoran() stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV') ]) tr.loadSents(RawSentenceReader(filename), lambda sent: filter(lambda x:x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent))) #print('Build...') tr.build() ranks = tr.rank() #for k in sorted(ranks, key=ranks.get, reverse=True)[:100]: #print("\t".join([str(k), str(ranks[k]), str(tr.dictCount[k])])) sentence = '%s.' % (tr.summarize(rate).split('. ')[0]) kkma = Kkma() print(sentence) print(list(x[0] for x in (list(filter(lambda x: x[1][0] == 'N', kkma.pos(sentence))))))
import jieba # from bm25 import BM25 from textrank import TextRank import utils from snownlp import seg from sys import argv fact = argv[1] # fact = '公诉机关指控:2016年3月28日20时许,被告人颜某在本市洪山区马湖新村足球场马路边捡拾到被害人谢某的VIVOX5手机一部,' \ # '并在同年3月28日2、1时起,分多次通过支付宝小额免密支付功能,秘密盗走被害人谢某支付宝内人民币3723元。案发后,被告人颜某家属已赔偿被害人全部损失,' \ # '并取得谅解。公诉机关认为被告人颜某具有退赃、取得谅解、自愿认罪等处罚情节,建议判处被告人颜某一年以下××、××或者××,并处罚金。' if __name__ == '__main__': sents = utils.get_sentences(fact) doc = [] for sent in sents: words = seg.seg(sent) # words = list(jieba.cut(sent)) words = utils.filter_stop(words) doc.append(words) # print(doc) # s = BM25(doc) # print(s.f) # print(s.df) # print(s.idf) rank = TextRank(doc) rank.text_rank() for index in rank.top_index(3): print(sents[index])
import os import json import responder from textrank import TextRank env = os.environ DEBUG = env['DEBUG'] in ['1', 'True', 'true'] RATIO = float(env['RATIO']) MODEL = env.get('MODEL') api = responder.API(debug=DEBUG) textrank = TextRank(env['LIBRARY'], MODEL) @api.route("/") async def get_keywords(req, resp): body = await req.text text_list = json.loads(body) keywords_list = [textrank.keywords(text, RATIO) for text in text_list] resp_dict = dict(data=keywords_list) resp.media = resp_dict if __name__ == "__main__": api.run()
from textrank import TextRank from article import get_text text = get_text('https://www.bbc.com/news/world-us-canada-47848619') tr = TextRank(text, lang='english', metric='log', graph='HITS') tr.summarize() ''' Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post. In February Mr Sakurada had to make another apology, after arriving three minutes late to a parliamentary meeting. "I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe. It is not the first time Mr Sakurada has been forced to apologise. After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him. Image copyright AFP Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011. Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister. Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post. "I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe. It is not the first time Mr Sakurada has been forced to apologise. After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him. Image copyright AFP Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011. The 2011 tsunami left more than 20,000 dead and caused a meltdown at the Fukushima Daiichi nuclear plant. Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister. '''
class NewsMining(): """News Mining""" def __init__(self): self.textranker = TextRank() self.ners = ['PERSON', 'ORG', 'GPE'] self.ner_dict = { 'PERSON': 'Person', # People, including fictional 'ORG': 'Organization', # Companies, agencies, institutions, etc. 'GPE': 'Location', # Countries, cities, states. } # dependency markers for subjects self.SUBJECTS = { "nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl" } # dependency markers for objects self.OBJECTS = {"dobj", "dative", "attr", "oprd"} self.graph_shower = GraphShow() def clean_spaces(self, s): s = s.replace('\r', '') s = s.replace('\t', ' ') s = s.replace('\n', ' ') return s def remove_noisy(self, content): """Remove brackets""" p1 = re.compile(r'([^)]*)') p2 = re.compile(r'\([^\)]*\)') return p2.sub('', p1.sub('', content)) def collect_ners(self, ents): """Collect token only with PERSON, ORG, GPE""" collected_ners = [] for token in ents: if token.label_ in self.ners: collected_ners.append(token.text + '/' + token.label_) return collected_ners def conll_syntax(self, sent): """Convert one sentence to conll format.""" tuples = list() for word in sent: if word.head is word: head_idx = 0 else: head_idx = word.head.i + 1 tuples.append([ word.i + 1, # Current word index, begin with 1 word.text, # Word word.lemma_, # Lemma word.pos_, # Coarse-grained tag word.tag_, # Fine-grained tag '_', head_idx, # Head of current Index word.dep_, # Relation '_', '_' ]) return tuples def syntax_parse(self, sent): """Convert one sentence to conll format.""" tuples = list() for word in sent: if word.head is word: head_idx = 0 else: head_idx = word.head.i + 1 tuples.append([ word.i + 1, # Current word index, begin with 1 word.text, # Word word.pos_, # Coarse-grained tag word.head, head_idx, # Head of current Index word.dep_, # Relation ]) return tuples def build_parse_chile_dict(self, sent, tuples): child_dict_list = list() for word in sent: child_dict = dict() for arc in tuples: if arc[3] == word: if arc[-1] in child_dict: child_dict[arc[-1]].append(arc) else: child_dict[arc[-1]] = [] child_dict[arc[-1]].append(arc) child_dict_list.append([word, word.pos_, word.i, child_dict]) return child_dict_list def complete_VOB(self, verb, child_dict_list): '''Find VOB by SBV''' for child in child_dict_list: word = child[0] # child_dict: {'dobj': [[7, 'startup', 'NOUN', buying, 5, 'dobj']], 'prep': [[8, 'for', 'ADP', buying, 5, 'prep']]} child_dict = child[3] if word == verb: for object_type in self.OBJECTS: # object_type: 'dobj' if object_type not in child_dict: continue # [7, 'startup', 'NOUN', buying, 5, 'dobj'] vob = child_dict[object_type][0] obj = vob[1] # 'startup' return obj return '' def extract_triples(self, sent): svo = [] tuples = self.syntax_parse(sent) child_dict_list = self.build_parse_chile_dict(sent, tuples) for tuple in tuples: rel = tuple[-1] if rel in self.SUBJECTS: sub_wd = tuple[1] verb_wd = tuple[3] obj = self.complete_VOB(verb_wd, child_dict_list) subj = sub_wd verb = verb_wd.text if not obj: svo.append([subj, verb]) else: svo.append([subj, verb + ' ' + obj]) return svo def extract_keywords(self, words_postags): return self.textranker.extract_keywords(words_postags, 10) def collect_coexist(self, ner_sents, ners): """Construct NER co-occurrence matrices""" co_list = [] for words in ner_sents: co_ners = set(ners).intersection(set(words)) co_info = self.combination(list(co_ners)) co_list += co_info if not co_list: return [] return {i[0]: i[1] for i in Counter(co_list).most_common()} def combination(self, a): '''list all combination''' combines = [] if len(a) == 0: return [] for i in a: for j in a: if i == j: continue combines.append('@'.join([i, j])) return combines def main(self, content): '''Main function''' if not content: return [] words_postags = [] # token and its POS tag ner_sents = [] # store sentences which contain NER entity ners = [] # store all NER entity from whole article triples = [] # store subject verb object events = [] # store events # 01 remove linebreaks and brackets content = self.remove_noisy(content) content = self.clean_spaces(content) # 02 split to sentences doc = nlp(content) for i, sent in enumerate(doc.sents): words_postags = [[token.text, token.pos_] for token in sent] words = [token.text for token in sent] postags = [token.pos_ for token in sent] ents = nlp(sent.text).ents # NER detection collected_ners = self.collect_ners(ents) if collected_ners: # only extract triples when the sentence contains 'PERSON', 'ORG', 'GPE' triple = self.extract_triples(sent) if not triple: continue triples += triple ners += collected_ners ner_sents.append( [token.text + '/' + token.label_ for token in sent.ents]) # 03 get keywords keywords = [i[0] for i in self.extract_keywords(words_postags)] for keyword in keywords: name = keyword cate = 'keyword' events.append([name, cate]) # 04 add triples to event only the word in keyword for t in triples: if (t[0] in keywords or t[1] in keywords) and len(t[0]) > 1 and len(t[1]) > 1: events.append([t[0], t[1]]) # 05 get word frequency and add to events word_dict = [ i for i in Counter([ i[0] for i in words_postags if i[1] in ['NOUN', 'PROPN', 'VERB'] and len(i[0]) > 1 ]).most_common() ][:10] for wd in word_dict: name = wd[0] cate = 'frequency' events.append([name, cate]) # 06 get NER from whole article ner_dict = {i[0]: i[1] for i in Counter(ners).most_common(20)} for ner in ner_dict: name = ner.split('/')[0] # Jessica Miller cate = self.ner_dict[ner.split('/')[1]] # PERSON events.append([name, cate]) # 07 get all NER entity co-occurrence information # here ner_dict is from above 06 co_dict = self.collect_coexist(ner_sents, list(ner_dict.keys())) co_events = [[ i.split('@')[0].split('/')[0], i.split('@')[1].split('/')[0] ] for i in co_dict] events += co_events # 08 show event graph self.graph_shower.create_page(events)
parser = argparse.ArgumentParser() parser.add_argument("--infer_path", default=None, type=str) parser.add_argument("--save_path", default='./output/', type=str) parser.add_argument("--use_textrank", default=0, type=int) parser.add_argument('--max_len', type=int, default=512, help='max seq len') parser.add_argument('--col_name', type=str, default='text_original', help='column name') args = parser.parse_args() if args.use_textrank: print('use textrank') tr = TextRank() data = tr.predict(args.infer_path) else: print('use kobart only') data = pd.read_csv(args.infer_path) # data['article_concat'] = data.article_original.apply(concat) #pretrain_kobart_model use def load_model(): model = BartForConditionalGeneration.from_pretrained('./kobart_summary') return model model = load_model() tokenizer = get_kobart_tokenizer()
# coding: utf-8 from textrank import TextRank #textrank 모듈 불러오기 f = open("text.txt", 'r', encoding='utf-8') #stopwords 템플릿 text = f.read() tr = TextRank(text) #textrank 실행 f.close() i = 1 for row in tr.summarize(3): #요약된 문장과 키워드 출력 print(str(i) + '. ' + row) i += 1 print('keywords :', tr.keywords())
# output parser.add_argument("--output_path", type=str, required=True, help="directory for results") args = parser.parse_args() if __name__ == "__main__": # initialize Textrank model = TextRank( min_count=args.min_count, min_sim=args.min_sim, tokenizer=args.tokenizer, noun=args.noun, similarity=args.similarity, df=args.df, method=args.method, stopwords=None, ) data = get_data(args.test_path) output_path = args.output_path hyp_path = f"{output_path}/hyp" abs_ref_path = f"{output_path}/abs_ref" if not os.path.exists(output_path): os.makedirs(output_path) if not os.path.exists(hyp_path): os.makedirs(hyp_path)
def textrank(text): return TextRank(text=text)
from textrank import TextRank, RawTaggerReader import sys filename = sys.argv[1] rate = float(sys.argv[2]) tr = TextRank(window=5, coef=1) #print('Load...') stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV'), ('없', 'VV')]) tr.load(RawTaggerReader(filename), lambda w: w not in stopword and (w[1] in ('NNG', 'NNP', 'VV', 'VA'))) #print('Build...') tr.build() kw = tr.extract(rate) for k in sorted(kw, key=kw.get, reverse=True): text = '' for i in range(len(k)): text = '%s %s' % (text, k[i][0]) text = '%s %f' % (text, kw[k]) print(text)
class Order: def __init__(self, text, seg=None, tagger=None): self.text = text self.tagger = tagger if tagger is not None else self.get_tagger() self.seg = seg if seg is not None else self.get_seg() self.words_merge = None def get_keywords(self, limit=5, merge=False): doc = [] sentences = self.get_sentences() for sentence in sentences: words = list(self.seg.seg(sentence)) words = self.filter_stop(words) doc.append(words) self.keywordrank = KeywordRank(doc) self.keywordrank.solve() result = [] for w in self.keywordrank.top_index(limit): result.append(w) if merge: wm = self.words_merge.merge(self.text, result) return wm.merge() return result def get_summaries(self, limit=5): doc = [] sentences = self.get_sentences() for sentence in sentences: words = list(self.seg.seg(sentence)) words = self.filter_stop(words) doc.append(words) self.textrank = TextRank(doc) self.textrank.solve() result = [] for index in self.textrank.top_index(limit): result.append(sentences[index]) return result def get_sentences(self): line_break_re = re.compile('[\r\n]') delimiter_re = re.compile('[,。?!;]') sentences = [] for line in line_break_re.split(self.text): line = line.strip() if not line: continue for sentence in delimiter_re.split(line): sentence = sentence.strip() if not sentence: continue sentences.append(sentence) return sentences def get_seg(self, fname='seg.pickle'): seg = Seg() seg.load(fname) return seg def get_tagger(self, fname='tag.pickle'): tagger = Tag() tagger.load(fname) return tagger def filter_stop(self, words): return list(filter(lambda x: x not in stop_words, words))
from textrank import TextRank import sys import os from flask import Flask, request, render_template, send_file app = Flask(__name__) @app.route('/') def main(): return render_template('index.html') @app.route('/analyze', methods=['POST']) def analyze(): if (request.method == 'POST'): payload = request.get_json() input_text = payload["text"] t = TextRank(input_text) t.analyze(50) t.generate_cloud().to_file("temp.png") return send_file("temp.png", mimetype='image/png') if __name__ == '__main__': input_file = open(sys.argv[1]) t = TextRank(input_file.read(), iterations=100) t.analyze(30) t.generate_cloud() # app.config['TEMPLATES_AUTO_RELOAD'] = True # app.run(host='0.0.0.0', port=8080)